Gene Ontology Analysis analysis of LCM RNA Data

Managing Packages Using Renv

To run this code in my project using the renv environment, run the following lines of code

install.packages("renv") #install the package on the new computer (may not be necessary if renv bootstraps itself as expected)
renv::restore() #reinstall all the package versions in the renv lockfile

Load packages

library("ViSEAGO")
## 
## Warning: replacing previous import 'data.table::set' by 'dendextend::set' when
## loading 'ViSEAGO'
## Warning: replacing previous import 'dendextend::cutree' by 'stats::cutree' when
## loading 'ViSEAGO'
## Warning: replacing previous import 'AnnotationDbi::tail' by 'utils::tail' when
## loading 'ViSEAGO'
require("topGO")
## Loading required package: topGO
## Loading required package: BiocGenerics
## Loading required package: generics
## 
## Attaching package: 'generics'
## The following objects are masked from 'package:base':
## 
##     as.difftime, as.factor, as.ordered, intersect, is.element, setdiff,
##     setequal, union
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, saveRDS, table, tapply, unique,
##     unsplit, which.max, which.min
## Loading required package: graph
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: GO.db
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: SparseM
## 
## groupGOTerms:    GOBPTerm, GOMFTerm, GOCCTerm environments built.
## 
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
## 
##     members
require("tidyverse")
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ ggplot2::annotate()   masks ViSEAGO::annotate()
## ✖ stringr::boundary()   masks graph::boundary()
## ✖ dplyr::collapse()     masks IRanges::collapse()
## ✖ dplyr::combine()      masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::desc()         masks IRanges::desc()
## ✖ tidyr::expand()       masks S4Vectors::expand()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ dplyr::first()        masks S4Vectors::first()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ ggplot2::Position()   masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()       masks IRanges::reduce()
## ✖ dplyr::rename()       masks S4Vectors::rename()
## ✖ lubridate::second()   masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::select()       masks AnnotationDbi::select()
## ✖ dplyr::slice()        masks IRanges::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#BiocManager::install("GO.db")

sessionInfo() #provides list of loaded packages and version of R.
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.0.1
## 
## Matrix products: default
## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices datasets  utils     methods  
## [8] base     
## 
## other attached packages:
##  [1] lubridate_1.9.4      forcats_1.0.1        stringr_1.6.0       
##  [4] dplyr_1.1.4          purrr_1.2.0          readr_2.1.5         
##  [7] tidyr_1.3.1          tibble_3.3.0         ggplot2_4.0.0       
## [10] tidyverse_2.0.0      topGO_2.62.0         SparseM_1.84-2      
## [13] GO.db_3.22.0         AnnotationDbi_1.72.0 IRanges_2.44.0      
## [16] S4Vectors_0.48.0     Biobase_2.70.0       graph_1.88.0        
## [19] BiocGenerics_0.56.0  generics_0.1.4       ViSEAGO_1.24.0      
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3     rstudioapi_0.17.1      jsonlite_2.0.0        
##   [4] shape_1.4.6.1          magrittr_2.0.4         farver_2.1.2          
##   [7] rmarkdown_2.30         GlobalOptions_0.1.2    fs_1.6.6              
##  [10] vctrs_0.6.5            memoise_2.0.1          RCurl_1.98-1.17       
##  [13] webshot_0.5.5          htmltools_0.5.8.1      progress_1.2.3        
##  [16] dynamicTreeCut_1.63-1  curl_7.0.0             sass_0.4.10           
##  [19] bslib_0.9.0            htmlwidgets_1.6.4      plyr_1.8.9            
##  [22] httr2_1.2.1            plotly_4.11.0          cachem_1.1.0          
##  [25] igraph_2.2.1           lifecycle_1.0.4        iterators_1.0.14      
##  [28] pkgconfig_2.0.3        Matrix_1.7-4           R6_2.6.1              
##  [31] fastmap_1.2.0          clue_0.3-66            digest_0.6.37         
##  [34] colorspace_2.1-2       RSQLite_2.4.3          seriation_1.5.8       
##  [37] filelock_1.0.3         timechange_0.3.0       httr_1.4.7            
##  [40] compiler_4.5.2         bit64_4.6.0-1          withr_3.0.2           
##  [43] doParallel_1.0.17      S7_0.2.0               BiocParallel_1.44.0   
##  [46] viridis_0.6.5          DBI_1.2.3              UpSetR_1.4.0          
##  [49] heatmaply_1.6.0        dendextend_1.19.1      R.utils_2.13.0        
##  [52] biomaRt_2.66.0         rappdirs_0.3.3         rjson_0.2.23          
##  [55] tools_4.5.2            R.oo_1.27.1            glue_1.8.0            
##  [58] DiagrammeR_1.0.11      GOSemSim_2.36.0        grid_4.5.2            
##  [61] cluster_2.1.8.1        fgsea_1.36.0           gtable_0.3.6          
##  [64] tzdb_0.5.0             R.methodsS3_1.8.2      ca_0.71.1             
##  [67] data.table_1.17.8      hms_1.1.4              XVector_0.50.0        
##  [70] foreach_1.5.2          pillar_1.11.1          yulab.utils_0.2.1     
##  [73] circlize_0.4.16        BiocFileCache_3.0.0    lattice_0.22-7        
##  [76] renv_1.1.5             bit_4.6.0              tidyselect_1.2.1      
##  [79] registry_0.5-1         ComplexHeatmap_2.26.0  Biostrings_2.78.0     
##  [82] knitr_1.50             gridExtra_2.3          Seqinfo_1.0.0         
##  [85] xfun_0.54              matrixStats_1.5.0      DT_0.34.0             
##  [88] visNetwork_2.1.4       stringi_1.8.7          lazyeval_0.2.2        
##  [91] yaml_2.3.10            evaluate_1.0.5         codetools_0.2-20      
##  [94] BiocManager_1.30.26    cli_3.6.5              jquerylib_0.1.4       
##  [97] dichromat_2.0-0.1      Rcpp_1.1.0             dbplyr_2.5.1          
## [100] png_0.1-8              XML_3.99-0.19          parallel_4.5.2        
## [103] assertthat_0.2.1       blob_1.2.4             prettyunits_1.2.0     
## [106] AnnotationForge_1.52.0 bitops_1.0-9           viridisLite_0.4.2     
## [109] scales_1.4.0           crayon_1.5.3           GetoptLong_1.0.5      
## [112] rlang_1.1.6            cowplot_1.2.0          fastmatch_1.1-6       
## [115] KEGGREST_1.50.0        TSP_1.2-5

Description of pipeline

I am going to perform functional enrichment of GO terms using ViSEAGO.

I am following this vignette: http://bioconductor.unipi.it/packages/devel/bioc/vignettes/ViSEAGO/inst/doc/ViSEAGO.html.

Load in reference files and differential expression data

In the next chunk I am loading in my DESeq data. These results are ordered by adjusted p-value. As a reminder, negative LFC = higher in Oral tissue, and positive LFC = higher in Aboral tissue.

#load in DESeq results
DESeq <- read.csv("../output_RNA/differential_expression/DESeq_results.csv", header = TRUE) %>% dplyr::rename("query" ="X")

#make dataframes of just differentially expressed genes for each LFC direction - filtering a little more stringent, abs(LFC) >2
DE_05_Aboral <- DESeq %>% filter(padj < 0.05 & log2FoldChange < -1)
DE_05_OralEpi <- DESeq %>% filter(padj < 0.05& log2FoldChange > 1)

#load in annotation data 
annot_tab <- read.delim("../references/annotation/protein-GO.tsv") %>% dplyr::rename(GOs = GeneOntologyIDs)

#filter annotation data for just expressed genes with GO annotations
annot_tab <- annot_tab %>% filter(query %in% DESeq$query) 

annot_tab$GOs <- gsub("; ", ";", annot_tab$GOs)
annot_tab$GOs[annot_tab$GOs==""] <- NA
annot_tab <- annot_tab %>% filter(!is.na(GOs))

nrow(annot_tab)
## [1] 10638
nrow(annot_tab)/nrow(DESeq)
## [1] 0.7354812

10638/14464 genes in our dataset have GO information in this file. That is 74%.

sum(annot_tab$query %in% DE_05_Aboral$query)
## [1] 376
sum(annot_tab$query %in% DE_05_Aboral$query)/nrow(DE_05_Aboral)
## [1] 0.6811594
sum(annot_tab$query %in% DE_05_OralEpi$query)
## [1] 826
sum(annot_tab$query %in% DE_05_OralEpi$query)/nrow(DE_05_OralEpi)
## [1] 0.6592179

379/558 genes that are significantly upregulated in the Aboral tissue have annotation information. That is 68% of the genes.

796/1216 genes that are significantly upregulated in the Oral Epidermis tissue have annotation information. That is 71% of the genes.

Create custom GO annotation file for ViSEAGO

##Get a list of GO Terms for all genes
annots <- annot_tab %>% dplyr::select(query,GOs,ProteinNames) %>% dplyr::rename("GO.terms" = GOs)

# format into the format required by ViSEAGO for custom mappings
Custom_GOs <- annots %>%
  # Separate GO terms into individual rows
  separate_rows(GO.terms, sep = ";") %>%
  # Add necessary columns
  mutate(
    taxid = "pacuta",
    gene_symbol = ProteinNames,
    evidence = "SwissProt"
  ) %>%
  # Rename columns
  dplyr::rename(
    gene_id = query,
    GOID = GO.terms
  ) %>%
  dplyr::select(taxid, gene_id, gene_symbol, GOID, evidence)

Custom_GOs_valid <- Custom_GOs %>% filter(GOID %in% keys(GO.db))

write.table(Custom_GOs_valid, "../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt",row.names = FALSE, sep = "\t", quote = FALSE,col.names=TRUE)

length(unique(Custom_GOs$gene_id))
## [1] 10638
length(unique(Custom_GOs_valid$gene_id))
## [1] 10637

We seem to have lost one gene when filtering for valid GO terms, so I need to account for that below.

load the file into ViSEAGO

Custom_Pacuta <- ViSEAGO::Custom2GO("../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt")
## 'select()' returned 1:1 mapping between keys and columns
myGENE2GO_Pacuta <- ViSEAGO::annotate(
    id="pacuta",
    Custom_Pacuta
)

Create gene lists for enrichment

selection <- DESeq %>% filter(query %in% Custom_GOs_valid$gene_id) %>% 
                       mutate(DE_05_Aboral = ifelse(query %in% DE_05_Aboral$query, 1,0)) %>%
                       mutate(DE_05_Oral = ifelse(query %in% DE_05_OralEpi$query, 1,0)) %>%
                       mutate(expressed = 1)

selection_Aboral <- selection %>% pull(DE_05_Aboral) %>% as.factor()
names(selection_Aboral) <- selection %>% pull(query)

selection_Oral <- selection %>% pull(DE_05_Oral) %>% as.factor()
names(selection_Oral) <- selection %>% pull(query)

expressed <- selection %>% pull(expressed) %>% as.factor()
names(expressed) <- selection %>% pull(query)

BP

Oral Epidermis:

create topGO objects and perform enrichment using topGO wrapped by ViSEAGO

# create viseago object
selection <- names(selection_Oral)[selection_Oral==1]
background <- names(expressed)

BP_Oral <- ViSEAGO::create_topGOdata(
    geneSel=selection,
    allGenes=background,
    gene2GO=myGENE2GO_Pacuta, 
    ont="BP",
    nodeSize=5
)
## 
## Building most specific GOs .....
##  ( 8797 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 12496 GO terms and 27095 relations. )
## 
## Annotating nodes ...............
##  ( 9496 genes annotated to the GO terms. )
# perform TopGO test using elim algorithm
elim_Oral <- topGO::runTest(
    BP_Oral,
    algorithm ="elim",
    statistic = "fisher"
)
## 
##           -- Elim Algorithm -- 
## 
##       the algorithm is scoring 4469 nontrivial nodes
##       parameters: 
##           test statistic: fisher
##           cutOff: 0.01
## 
##   Level 19:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 18:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  8 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  18 nodes to be scored   (0 eliminated genes)
## 
##   Level 15:  39 nodes to be scored   (0 eliminated genes)
## 
##   Level 14:  42 nodes to be scored   (61 eliminated genes)
## 
##   Level 13:  70 nodes to be scored   (68 eliminated genes)
## 
##   Level 12:  158 nodes to be scored  (96 eliminated genes)
## 
##   Level 11:  312 nodes to be scored  (115 eliminated genes)
## 
##   Level 10:  469 nodes to be scored  (382 eliminated genes)
## 
##   Level 9:   592 nodes to be scored  (540 eliminated genes)
## 
##   Level 8:   646 nodes to be scored  (699 eliminated genes)
## 
##   Level 7:   689 nodes to be scored  (979 eliminated genes)
## 
##   Level 6:   638 nodes to be scored  (1207 eliminated genes)
## 
##   Level 5:   444 nodes to be scored  (1477 eliminated genes)
## 
##   Level 4:   244 nodes to be scored  (1582 eliminated genes)
## 
##   Level 3:   79 nodes to be scored   (1626 eliminated genes)
## 
##   Level 2:   16 nodes to be scored   (1626 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (1626 eliminated genes)
BP_Results <- ViSEAGO::merge_enrich_terms(
    Input = list( Oral_elim = c("BP_Oral", "elim_Oral")))
## 'select()' returned 1:1 mapping between keys and columns

Visualize and save initial results

# display the merged table
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
##  Oral_elim
##       BP_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 826
##         feasible_genes: 9496
##         feasible_genes_significant: 712
##         genes_nodeSize: 5
##         nodes_number: 6675
##         edges_number: 14047
##       elim_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 6675
##         GO_significant: 67
##         feasible_genes: 9496
##         feasible_genes_significant: 712
##         genes_nodeSize: 5
##         Nontrivial_nodes: 4469 
##  - enrichment pvalue cutoff:
##         Oral_elim : 0.01
## - enrich GOs (in at least one list): 67 GO terms of 1 conditions.
##         Oral_elim : 67 terms
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
    BP_Results,
    "../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral.tsv"
)
  • enrichment pvalue cutoff: Oral_elim : 0.01
  • enrich GOs (in at least one list): Oral_elim : 67 terms

Semantic similarity –> Cluster: Use Wang graph-based method to identify similarity between GO terms

# initialize 
myGOs<-ViSEAGO::build_GO_SS(
    gene2GO=myGENE2GO_Pacuta,
    enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
    myGOs,
   distance=c("Wang")
)

myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Oral_elim
##       BP_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 826
##         feasible_genes: 9496
##         feasible_genes_significant: 712
##         genes_nodeSize: 5
##         nodes_number: 6675
##         edges_number: 14047
##       elim_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 6675
##         GO_significant: 67
##         feasible_genes: 9496
##         feasible_genes_significant: 712
##         genes_nodeSize: 5
##         Nontrivial_nodes: 4469 
##  - enrichment pvalue cutoff:
##         Oral_elim : 0.01
## - enrich GOs (in at least one list): 67 GO terms of 1 conditions.
##         Oral_elim : 67 terms
## - terms distances:  Wang

Multi Dimensional Scaling Plot of the GO-terms based on semantic similarity

# display MDSplot
ViSEAGO::MDSplot(myGOs,
                 "GOterms")

Heatmap + table of GO terms, with clusters aggregated by hclust with ward.D2 method

# GOterms heatmap with the default parameters
Wang_clusters_wardD2_Oral <- ViSEAGO::GOterms_heatmap(
  myGOs,
  showIC = TRUE,
  showGOlabels = TRUE,
  GO.tree = list(
    tree = list(distance = "Wang", aggreg.method = "ward.D2"),
    cut = list(
      dynamic = list(
        pamStage = TRUE,
        pamRespectsDendro = TRUE,
        deepSplit = 2,
        minClusterSize = 2
      )
    )
  ),
  samples.tree = NULL
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the dendextend package.
##   Please report the issue at <https://github.com/talgalili/dendextend/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Oral,
    "GOterms")
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Oral,
    height=1000,
    width=700,
    "GOterms", file="../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral_cluster_heatmap_Wang_wardD2.png")
## quartz_off_screen 
##                 2
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2_Oral)
# Print the clusters-heatmap table
ViSEAGO::show_table(
    Wang_clusters_wardD2_Oral,
    "../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral_cluster_heatmap_Wang_wardD2.tsv"
)

Multi Dimensional Scaling Plot of the enriched GO-terms based on semantic similarity, colored by cluster

# display colored MDSplot
ViSEAGO::MDSplot(
    Wang_clusters_wardD2_Oral,
    "GOterms")

Further summarizing the Semantic Similarity-based clusters

# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2_Oral<-ViSEAGO::compute_SS_distances(
    Wang_clusters_wardD2_Oral,
    distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned 1:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
    Wang_clusters_wardD2_Oral,
    "GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2_Oral <-ViSEAGO::GOclusters_heatmap(
    Wang_clusters_wardD2_Oral,
    tree=list(
        distance="BMA",
        aggreg.method="ward.D2"
    )
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Oral,
    "GOclusters")
DE_05_SwissProt <- read.csv("../output_RNA/differential_expression/DE_05_SwissProt_annotation.csv")
DESeq_SwissProt <- read.csv("../output_RNA/differential_expression/DESeq_SwissProt_annotation.csv")

Oral_enriched_clustered <- Wang_clusters_wardD2_Oral@enrich_GOs@data

top_cluster <- Oral_enriched_clustered %>% arrange(Oral_elim.pvalue) %>% head(1) %>% pull(GO.cluster)

Oral_enriched_clustered %>% dplyr::filter(GO.cluster == top_cluster)
##     GO.cluster    IC      GO.ID
##         <char> <num>     <char>
##  1:          7  9.45 GO:0006068
##  2:          7  6.37 GO:0009308
##  3:          7  9.11 GO:0009812
##  4:          7  8.86 GO:0051923
##  5:          7  9.11 GO:0050427
##  6:          7  8.11 GO:0009208
##  7:          7  6.36 GO:0017148
##  8:          7  8.11 GO:0016114
##  9:          7  6.43 GO:1901607
## 10:          7  9.27 GO:0046314
##                                                         term
##                                                       <char>
##  1:                                ethanol catabolic process
##  2:                                  amine metabolic process
##  3:                              flavonoid metabolic process
##  4:                                                sulfation
##  5:  3'-phosphoadenosine 5'-phosphosulfate metabolic process
##  6: pyrimidine ribonucleoside triphosphate metabolic process
##  7:                       negative regulation of translation
##  8:                           terpenoid biosynthetic process
##  9:                    alpha-amino acid biosynthetic process
## 10:                     phosphocreatine biosynthetic process
##                                                                                                                                                                                                                                                                                             definition
##                                                                                                                                                                                                                                                                                                 <char>
##  1:                                                                                                                           The chemical reactions and pathways resulting in the breakdown of ethanol, CH3-CH2-OH, a colorless, water-miscible, flammable liquid produced by alcoholic fermentation.
##  2: The chemical reactions and pathways involving any organic compound that is weakly basic in character and contains an amino or a substituted amino group. Amines are called primary, secondary, or tertiary according to whether one, two, or three carbon atoms are attached to the nitrogen atom.
##  3:                                                                                               The chemical reactions and pathways involving flavonoids, a group of water-soluble phenolic derivatives containing a flavan skeleton including flavones, flavonols and flavanoids, and anthocyanins.
##  4:                                                                                                                                                                                                                                                     The addition of a sulfate group to a molecule.
##  5:                                                                          The chemical reactions and pathways involving 3'-phosphoadenosine 5'-phosphosulfate, a naturally occurring mixed anhydride. It is an intermediate in the formation of a variety of sulfo compounds in biological systems.
##  6:                                                                                               The chemical reactions and pathways involving pyrimidine ribonucleoside triphosphate, a compound consisting of a pyrimidine base linked to a ribose sugar esterified with triphosphate on the sugar.
##  7:                                                                                                    Any process that stops, prevents, or reduces the frequency, rate or extent of the chemical reactions and pathways resulting in the formation of proteins by the translation of mRNA or circRNA.
##  8:                                                                                                                                The chemical reactions and pathways resulting in the formation of terpenoids, any member of a class of compounds characterized by an isoprenoid chemical structure.
##  9:                                                                                                                                                                                                                                                                                               <NA>
## 10:                                                                                                                        The chemical reactions and pathways resulting in the formation of phosphocreatine, a phosphagen of creatine which is synthesized and broken down by creatine phosphokinase.
##     Oral_elim.genes_frequency Oral_elim.pvalue Oral_elim.-log10_pvalue
##                        <char>            <num>                   <num>
##  1:                 60% (3/5)     3.742425e-03                    2.43
##  2:           17.188% (11/64)     7.446749e-03                    2.13
##  3:             71.429% (5/7)     4.321628e-05                    4.36
##  4:             44.444% (4/9)     2.909709e-03                    2.54
##  5:             85.714% (6/7)     1.141908e-06                    5.94
##  6:            33.333% (4/12)     9.533181e-03                    2.02
##  7:           15.476% (13/84)     9.327027e-03                    2.03
##  8:            33.333% (4/12)     9.533181e-03                    2.02
##  9:           17.105% (13/76)     3.948213e-03                    2.40
## 10:             66.667% (4/6)     4.159939e-04                    3.38
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               Oral_elim.Significant_genes
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <char>
##  1:                                                                                                                                                                                                                                                                                                                                                                                                                            Pocillopora_acuta_HIv2___RNAseq.10969_t;Pocillopora_acuta_HIv2___RNAseq.g21538.t1;Pocillopora_acuta_HIv2___RNAseq.g9093.t1
##  2:                                                                               Pocillopora_acuta_HIv2___RNAseq.10969_t;Pocillopora_acuta_HIv2___RNAseq.g11785.t1;Pocillopora_acuta_HIv2___RNAseq.g15141.t1;Pocillopora_acuta_HIv2___RNAseq.g20712.t1;Pocillopora_acuta_HIv2___RNAseq.g21538.t1;Pocillopora_acuta_HIv2___RNAseq.g21957.t1;Pocillopora_acuta_HIv2___RNAseq.g22120.t1;Pocillopora_acuta_HIv2___RNAseq.g26474.t1a;Pocillopora_acuta_HIv2___RNAseq.g27978.t1;Pocillopora_acuta_HIv2___RNAseq.g28398.t1;Pocillopora_acuta_HIv2___TS.g805.t1a
##  3:                                                                                                                                                                                                                                                                                                                                         Pocillopora_acuta_HIv2___RNAseq.10969_t;Pocillopora_acuta_HIv2___RNAseq.g12481.t1;Pocillopora_acuta_HIv2___RNAseq.g21538.t1;Pocillopora_acuta_HIv2___RNAseq.g9089.t1;Pocillopora_acuta_HIv2___RNAseq.g9093.t1
##  4:                                                                                                                                                                                                                                                                                                                                                                                   Pocillopora_acuta_HIv2___RNAseq.10969_t;Pocillopora_acuta_HIv2___RNAseq.g21538.t1;Pocillopora_acuta_HIv2___RNAseq.g9090.t1;Pocillopora_acuta_HIv2___RNAseq.g9093.t1
##  5:                                                                                                                                                                                                                                                                                               Pocillopora_acuta_HIv2___RNAseq.10969_t;Pocillopora_acuta_HIv2___RNAseq.g12481.t1;Pocillopora_acuta_HIv2___RNAseq.g21538.t1;Pocillopora_acuta_HIv2___RNAseq.g25759.t1;Pocillopora_acuta_HIv2___RNAseq.g9089.t1;Pocillopora_acuta_HIv2___RNAseq.g9093.t1
##  6:                                                                                                                                                                                                                                                                                                                                                                                    Pocillopora_acuta_HIv2___RNAseq.g11840.t1;Pocillopora_acuta_HIv2___RNAseq.g21884.t1;Pocillopora_acuta_HIv2___RNAseq.g3911.t1;Pocillopora_acuta_HIv2___TS.g19040.t1
##  7: Pocillopora_acuta_HIv2___RNAseq.g10158.t1;Pocillopora_acuta_HIv2___RNAseq.g1103.t1;Pocillopora_acuta_HIv2___RNAseq.g16905.t1;Pocillopora_acuta_HIv2___RNAseq.g1800.t1;Pocillopora_acuta_HIv2___RNAseq.g18756.t2;Pocillopora_acuta_HIv2___RNAseq.g219.t1;Pocillopora_acuta_HIv2___RNAseq.g24051.t1;Pocillopora_acuta_HIv2___RNAseq.g24787.t1;Pocillopora_acuta_HIv2___RNAseq.g24800.t1;Pocillopora_acuta_HIv2___RNAseq.g28169.t1;Pocillopora_acuta_HIv2___RNAseq.g4938.t1;Pocillopora_acuta_HIv2___RNAseq.g763.t1;Pocillopora_acuta_HIv2___TS.g4312.t1
##  8:                                                                                                                                                                                                                                                                                                                                                                                   Pocillopora_acuta_HIv2___RNAseq.g28425.t1;Pocillopora_acuta_HIv2___RNAseq.g610.t1;Pocillopora_acuta_HIv2___RNAseq.g9824.t1;Pocillopora_acuta_HIv2___RNAseq.g9825.t1
##  9:        Pocillopora_acuta_HIv2___RNAseq.g10854.t2;Pocillopora_acuta_HIv2___RNAseq.g12417.t1;Pocillopora_acuta_HIv2___RNAseq.g1504.t1;Pocillopora_acuta_HIv2___RNAseq.g16699.t1;Pocillopora_acuta_HIv2___RNAseq.g19367.t1;Pocillopora_acuta_HIv2___RNAseq.g23634.t1;Pocillopora_acuta_HIv2___RNAseq.g27978.t1;Pocillopora_acuta_HIv2___RNAseq.g28641.t1;Pocillopora_acuta_HIv2___RNAseq.g7498.t1;Pocillopora_acuta_HIv2___TS.g19253.t1;Pocillopora_acuta_HIv2___TS.g3401.t1;Pocillopora_acuta_HIv2___TS.g6241.t1e;Pocillopora_acuta_HIv2___TS.g7609.t1b
## 10:                                                                                                                                                                                                                                                                                                                                                                                Pocillopora_acuta_HIv2___RNAseq.g15574.t1;Pocillopora_acuta_HIv2___RNAseq.g15577.t1;Pocillopora_acuta_HIv2___RNAseq.g15584.t1;Pocillopora_acuta_HIv2___RNAseq.g2258.t1
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  Oral_elim.Significant_genes_symbol
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              <char>
##  1:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             Sulfotransferase 1B1 (ST1B1) (EC 2.8.2.1) (Sulfotransferase 1B2) (Sulfotransferase family cytosolic 1B member 1) (Thyroid hormone sulfotransferase);Sulfotransferase 1B1 (ST1B1) (EC 2.8.2.1) (Sulfotransferase 1B2) (Sulfotransferase family cytosolic 1B member 1) (Thyroid hormone sulfotransferase);Sulfotransferase 1C4 (ST1C4) (EC 2.8.2.1) (Sulfotransferase 1C2) (SULT1C#2)
##  2:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    Sulfotransferase 1B1 (ST1B1) (EC 2.8.2.1) (Sulfotransferase 1B2) (Sulfotransferase family cytosolic 1B member 1) (Thyroid hormone sulfotransferase);S-adenosylmethionine decarboxylase proenzyme (AdoMetDC) (SAMDC) (EC 4.1.1.50) [Cleaved into: S-adenosylmethionine decarboxylase alpha chain; S-adenosylmethionine decarboxylase beta chain];Cytokinin riboside 5'-monophosphate phosphoribohydrolase LOG3 (EC 3.2.2.n1) (Protein LONELY GUY 3);DBH-like monooxygenase protein 2 homolog (EC 1.14.17.-);Sulfotransferase 1B1 (ST1B1) (EC 2.8.2.1) (Sulfotransferase 1B2) (Sulfotransferase family cytosolic 1B member 1) (Thyroid hormone sulfotransferase);DBH-like monooxygenase protein 1 (EC 1.14.17.-) (Monooxygenase X);DBH-like monooxygenase protein 1 (EC 1.14.17.-) (DBH-related protein) (Monooxygenase X);Ornithine decarboxylase (ODC) (EC 4.1.1.17);Plasma membrane calcium-transporting ATPase 4 (PMCA4) (EC 7.2.2.10) (Matrix-remodeling-associated protein 1) (Plasma membrane calcium ATPase isoform 4) (Plasma membrane calcium pump isoform 4);Synphilin-1 (Sph1) (Alpha-synuclein-interacting protein);Spermidine synthase (SPDSY) (EC 2.5.1.16) (Putrescine aminopropyltransferase)
##  3:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         Sulfotransferase 1B1 (ST1B1) (EC 2.8.2.1) (Sulfotransferase 1B2) (Sulfotransferase family cytosolic 1B member 1) (Thyroid hormone sulfotransferase);Sulfotransferase 1C4 (ST1C4) (EC 2.8.2.1);Sulfotransferase 1B1 (ST1B1) (EC 2.8.2.1) (Sulfotransferase 1B2) (Sulfotransferase family cytosolic 1B member 1) (Thyroid hormone sulfotransferase);Sulfotransferase 1C4 (ST1C4) (EC 2.8.2.1);Sulfotransferase 1C4 (ST1C4) (EC 2.8.2.1) (Sulfotransferase 1C2) (SULT1C#2)
##  4:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               Sulfotransferase 1B1 (ST1B1) (EC 2.8.2.1) (Sulfotransferase 1B2) (Sulfotransferase family cytosolic 1B member 1) (Thyroid hormone sulfotransferase);Sulfotransferase 1B1 (ST1B1) (EC 2.8.2.1) (Sulfotransferase 1B2) (Sulfotransferase family cytosolic 1B member 1) (Thyroid hormone sulfotransferase);Sulfotransferase 1C2A (ST1C2A) (rSULT1C2A) (EC 2.8.2.1) (Sulfotransferase K2);Sulfotransferase 1C4 (ST1C4) (EC 2.8.2.1) (Sulfotransferase 1C2) (SULT1C#2)
##  5:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           Sulfotransferase 1B1 (ST1B1) (EC 2.8.2.1) (Sulfotransferase 1B2) (Sulfotransferase family cytosolic 1B member 1) (Thyroid hormone sulfotransferase);Sulfotransferase 1C4 (ST1C4) (EC 2.8.2.1);Sulfotransferase 1B1 (ST1B1) (EC 2.8.2.1) (Sulfotransferase 1B2) (Sulfotransferase family cytosolic 1B member 1) (Thyroid hormone sulfotransferase);Bifunctional 3'-phosphoadenosine 5'-phosphosulfate synthase (PAPS synthase) (PAPS synthetase) (PAPSS) (Sulfurylase kinase) (SK) [Includes: Sulfate adenylyltransferase (EC 2.7.7.4) (ATP-sulfurylase) (Sulfate adenylate transferase) (SAT); Adenylyl-sulfate kinase (EC 2.7.1.25) (3'-phosphoadenosine-5'-phosphosulfate synthase) (APS kinase) (Adenosine-5'-phosphosulfate 3'-phosphotransferase) (Adenylylsulfate 3'-phosphotransferase)];Sulfotransferase 1C4 (ST1C4) (EC 2.8.2.1);Sulfotransferase 1C4 (ST1C4) (EC 2.8.2.1) (Sulfotransferase 1C2) (SULT1C#2)
##  6:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           CTP synthase 1 (EC 6.3.4.2) (CTP synthetase 1) (UTP--ammonia ligase 1);Ectonucleoside triphosphate diphosphohydrolase 4 (NTPDase 4) (EC 3.6.1.15) (EC 3.6.1.6) (Golgi UDPase) (Lysosomal apyrase-like protein of 70 kDa) (Uridine-diphosphatase) (UDPase) (EC 3.6.1.42);Nucleoside diphosphate kinase homolog 5 (3'-5' exonuclease NME5) (EC 3.1.-.-);GTP:AMP phosphotransferase AK3, mitochondrial (EC 2.7.4.10) (Adenylate kinase 3) (Adenylate kinase 3 alpha-like 1) (Adenylate kinase isozyme 3)
##  7: Cytoplasmic polyadenylation element-binding protein 2 (CPE-BP2) (CPE-binding protein 2) (mCPEB-2);Mitochondrial assembly of ribosomal large subunit protein 1;E3 ubiquitin-protein ligase TRIM71 (EC 2.3.2.27) (Protein lin-41 homolog) (RING-type E3 ubiquitin transferase TRIM71) (Tripartite motif-containing protein 71);E3 ubiquitin-protein ligase TRIM71 (EC 2.3.2.27) (Protein lin-41 homolog) (RING-type E3 ubiquitin transferase TRIM71) (Tripartite motif-containing protein 71);Tripartite motif-containing protein 3 (EC 2.3.2.27) (Brain-expressed RING finger protein) (RING finger protein 22) (RING finger protein 97);Tripartite motif-containing protein 2 (EC 2.3.2.27) (E3 ubiquitin-protein ligase TRIM2) (RING-type E3 ubiquitin transferase TRIM2);Tripartite motif-containing protein 2 (EC 2.3.2.27) (E3 ubiquitin-protein ligase TRIM2) (RING-type E3 ubiquitin transferase TRIM2);E3 ubiquitin-protein ligase TRIM71 (EC 2.3.2.27) (Protein lin-41 homolog) (RING-type E3 ubiquitin transferase TRIM71) (Tripartite motif-containing protein 71);E3 ubiquitin-protein ligase TRIM71 (EC 2.3.2.27) (Protein lin-41 homolog) (RING-type E3 ubiquitin transferase TRIM71) (Tripartite motif-containing protein 71);E3 ubiquitin-protein ligase TRIM71 (EC 2.3.2.27) (Protein lin-41 homolog) (RING-type E3 ubiquitin transferase TRIM71) (Tripartite motif-containing protein 71);E3 ubiquitin-protein ligase TRIM71 (EC 2.3.2.27) (Protein lin-41 homolog) (RING-type E3 ubiquitin transferase TRIM71) (Tripartite motif-containing protein 71);Tripartite motif-containing protein 3 (EC 2.3.2.27) (Brain-expressed RING finger protein) (RING finger protein 22);Tripartite motif-containing protein 3 (EC 2.3.2.27) (Brain-expressed RING finger protein) (RING finger protein 22)
##  8:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 Protein arginine N-methyltransferase 3 (EC 2.1.1.319) (Heterogeneous nuclear ribonucleoprotein methyltransferase-like protein 3);Serine/threonine-protein kinase CTR1 (EC 2.7.11.1) (Protein CONSTITUTIVE TRIPLE RESPONSE1);Methyltransferase flvH (EC 2.1.1.-) (Flavunoidine biosynthesis cluster protein H);Methyltransferase flvH (EC 2.1.1.-) (Flavunoidine biosynthesis cluster protein H)
##  9:                                                                                  Phenylalanine-4-hydroxylase (PAH) (EC 1.14.16.1) (Phe-4-monooxygenase);Cystathionine gamma-lyase (CGL) (CSE) (EC 4.4.1.1) (Cysteine desulfhydrase) (Cysteine-protein sulfhydrase) (Gamma-cystathionase) (Homocysteine desulfhydrase) (EC 4.4.1.2);L-threonine ammonia-lyase (EC 4.3.1.19) (L-serine ammonia-lyase) (EC 4.3.1.17) (Threonine dehydratase);Glutathione hydrolase 1 proenzyme (EC 3.4.19.13) (Gamma-glutamyltransferase 1) (Gamma-glutamyltranspeptidase 1) (GGT 1) (EC 2.3.2.2) (Leukotriene-C4 hydrolase) (EC 3.4.19.14) (CD antigen CD224) [Cleaved into: Glutathione hydrolase 1 heavy chain; Glutathione hydrolase 1 light chain];Methylenetetrahydrofolate reductase (NADPH) (EC 1.5.1.53);Acireductone dioxygenase (Acireductone dioxygenase (Fe(2+)-requiring)) (ARD') (Fe-ARD) (EC 1.13.11.54) (Acireductone dioxygenase (Ni(2+)-requiring)) (ARD) (Ni-ARD) (EC 1.13.11.53);Plasma membrane calcium-transporting ATPase 4 (PMCA4) (EC 7.2.2.10) (Matrix-remodeling-associated protein 1) (Plasma membrane calcium ATPase isoform 4) (Plasma membrane calcium pump isoform 4);Protein TabA;Branched-chain-amino-acid aminotransferase (EC 2.6.1.42);Delta-1-pyrroline-5-carboxylate synthase (P5CS) (Aldehyde dehydrogenase family 18 member A1) [Includes: Glutamate 5-kinase (GK) (EC 2.7.2.11) (Gamma-glutamyl kinase); Gamma-glutamyl phosphate reductase (GPR) (EC 1.2.1.41) (Glutamate-5-semialdehyde dehydrogenase) (Glutamyl-gamma-semialdehyde dehydrogenase)];Asparagine synthetase domain-containing protein 1;Glutamate dehydrogenase (GDH) (EC 1.4.1.3) (NAD(P)H-utilizing glutamate dehydrogenase);Phosphoserine phosphatase (PSP) (PSPase) (EC 3.1.3.3) (O-phosphoserine phosphohydrolase)
## 10:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        Creatine kinase, testis isozyme (EC 2.7.3.2);Creatine kinase M-type (EC 2.7.3.2) (Creatine kinase M chain) (Creatine phosphokinase M-type) (CPK-M) (M-CK);Creatine kinase M-type (EC 2.7.3.2) (Creatine kinase M chain) (Creatine phosphokinase M-type) (CPK-M) (M-CK);Arginine kinase (AK) (EC 2.7.3.3)
BP_go_terms <- BP_Oral@graph@nodes
genes_by_GO <- genesInTerm(BP_Oral, BP_go_terms)


cluster_8 <- Oral_enriched_clustered %>% dplyr::filter(GO.cluster == 8) %>% pull(GO.ID)
cluster_8_9 <- c(cluster_8, "GO:0050982")
  
cluster_8_9_genes <- unique(unlist(genes_by_GO[cluster_8_9]))

write.csv(cluster_8_9_genes,"../output_RNA/differential_expression/semantic-enrichment/cluster_gene_lists/Oral_enrich_BP_Sensory.csv")

DE_05_SwissProt %>% filter(query %in%unique(unlist(genes_by_GO[Oral_enriched_clustered %>% pull(GO.ID)]))) %>% View()

Aboral Epidermis:

create topGO objects and perform enrichment using topGO wrapped by ViSEAGO

# create viseago object
selection <- names(selection_Aboral)[selection_Aboral==1]
background <- names(expressed)

BP_Aboral <- ViSEAGO::create_topGOdata(
    geneSel=selection,
    allGenes=background,
    gene2GO=myGENE2GO_Pacuta, 
    ont="BP",
    nodeSize=5
)
## 
## Building most specific GOs .....
##  ( 8797 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 12496 GO terms and 27095 relations. )
## 
## Annotating nodes ...............
##  ( 9496 genes annotated to the GO terms. )
# perform TopGO test using elim algorithm
elim_Aboral <- topGO::runTest(
    BP_Aboral,
    algorithm ="elim",
    statistic = "fisher"
)
## 
##           -- Elim Algorithm -- 
## 
##       the algorithm is scoring 3092 nontrivial nodes
##       parameters: 
##           test statistic: fisher
##           cutOff: 0.01
## 
##   Level 17:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  6 nodes to be scored    (0 eliminated genes)
## 
##   Level 15:  21 nodes to be scored   (0 eliminated genes)
## 
##   Level 14:  24 nodes to be scored   (0 eliminated genes)
## 
##   Level 13:  44 nodes to be scored   (175 eliminated genes)
## 
##   Level 12:  85 nodes to be scored   (236 eliminated genes)
## 
##   Level 11:  184 nodes to be scored  (238 eliminated genes)
## 
##   Level 10:  284 nodes to be scored  (291 eliminated genes)
## 
##   Level 9:   377 nodes to be scored  (334 eliminated genes)
## 
##   Level 8:   450 nodes to be scored  (625 eliminated genes)
## 
##   Level 7:   472 nodes to be scored  (816 eliminated genes)
## 
##   Level 6:   489 nodes to be scored  (1092 eliminated genes)
## 
##   Level 5:   366 nodes to be scored  (1921 eliminated genes)
## 
##   Level 4:   194 nodes to be scored  (2391 eliminated genes)
## 
##   Level 3:   78 nodes to be scored   (2465 eliminated genes)
## 
##   Level 2:   16 nodes to be scored   (2583 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (2583 eliminated genes)
BP_Results <- ViSEAGO::merge_enrich_terms(
    Input = list(Aboral_elim = c("BP_Aboral", "elim_Aboral"))
)
## 'select()' returned 1:1 mapping between keys and columns

Visualize and save initial results

# display the merged table
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
##  Aboral_elim
##       BP_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 376
##         feasible_genes: 9496
##         feasible_genes_significant: 332
##         genes_nodeSize: 5
##         nodes_number: 6675
##         edges_number: 14047
##       elim_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 6675
##         GO_significant: 118
##         feasible_genes: 9496
##         feasible_genes_significant: 332
##         genes_nodeSize: 5
##         Nontrivial_nodes: 3092 
##  - enrichment pvalue cutoff:
##         Aboral_elim : 0.01
## - enrich GOs (in at least one list): 118 GO terms of 1 conditions.
##         Aboral_elim : 118 terms
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
    BP_Results,
    "../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral.tsv"
)
  • enrichment pvalue cutoff: Aboral_elim : 0.01
  • enrich GOs (in at least one list): Aboral_elim : 118 terms

Semantic similarity –> Cluster: Use Wang graph-based method to identify similarity between GO terms

# initialize 
myGOs<-ViSEAGO::build_GO_SS(
    gene2GO=myGENE2GO_Pacuta,
    enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
    myGOs,
   distance=c("Wang")
)

myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Aboral_elim
##       BP_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 376
##         feasible_genes: 9496
##         feasible_genes_significant: 332
##         genes_nodeSize: 5
##         nodes_number: 6675
##         edges_number: 14047
##       elim_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 6675
##         GO_significant: 118
##         feasible_genes: 9496
##         feasible_genes_significant: 332
##         genes_nodeSize: 5
##         Nontrivial_nodes: 3092 
##  - enrichment pvalue cutoff:
##         Aboral_elim : 0.01
## - enrich GOs (in at least one list): 118 GO terms of 1 conditions.
##         Aboral_elim : 118 terms
## - terms distances:  Wang

Multi Dimensional Scaling Plot of the GO-terms based on semantic similarity

# display MDSplot
ViSEAGO::MDSplot(myGOs,
                 "GOterms")

Heatmap + table of GO terms, with clusters aggregated by hclust with ward.D2 method

# GOterms heatmap with the default parameters
Wang_clusters_wardD2_Aboral <- ViSEAGO::GOterms_heatmap(
  myGOs,
  showIC = TRUE,
  showGOlabels = TRUE,
  GO.tree = list(
    tree = list(distance = "Wang", aggreg.method = "ward.D2"),
    cut = list(
      dynamic = list(
        pamStage = TRUE,
        pamRespectsDendro = TRUE,
        deepSplit = 1,
        minClusterSize = 2
      )
    )
  ),
  samples.tree = NULL
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Aboral,
    "GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2_Aboral)
# Print the clusters-heatmap table
ViSEAGO::show_table(
    Wang_clusters_wardD2_Aboral,
    "../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral_cluster_heatmap_Wang_wardD2.tsv"
)

ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Aboral,
    height=1500,
    width=700,
    "GOterms", file="../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral_cluster_heatmap_Wang_wardD2.png")
## quartz_off_screen 
##                 2

Multi Dimensional Scaling Plot of the enriched GO-terms based on semantic similarity, colored by cluster

# display colored MDSplot
ViSEAGO::MDSplot(
    Wang_clusters_wardD2_Aboral,
    "GOterms")

Further summarizing the Semantic Similarity-based clusters

# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2_Aboral<-ViSEAGO::compute_SS_distances(
    Wang_clusters_wardD2_Aboral,
    distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned many:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
    Wang_clusters_wardD2_Aboral,
    "GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2_Aboral<-ViSEAGO::GOclusters_heatmap(
    Wang_clusters_wardD2_Aboral,
    tree=list(
        distance="BMA",
        aggreg.method="ward.D2"
    )
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Aboral,
    "GOclusters")
Aboral_enriched_clustered <- Wang_clusters_wardD2_Aboral@enrich_GOs@data

top_cluster <- Aboral_enriched_clustered %>% arrange(Aboral_elim.pvalue) %>% head(1) %>% pull(GO.cluster)

Aboral_enriched_clustered %>% dplyr::filter(GO.cluster == top_cluster)
##    GO.cluster    IC      GO.ID
##        <char> <num>     <char>
## 1:          9  6.30 GO:0007160
## 2:          9  4.14 GO:0007155
## 3:          9  8.23 GO:0016339
## 4:          9  6.32 GO:0007156
## 5:          9  7.39 GO:0007157
##                                                                                term
##                                                                              <char>
## 1:                                                             cell-matrix adhesion
## 2:                                                                    cell adhesion
## 3: calcium-dependent cell-cell adhesion via plasma membrane cell adhesion molecules
## 4:                  homophilic cell adhesion via plasma membrane adhesion molecules
## 5:      heterophilic cell-cell adhesion via plasma membrane cell adhesion molecules
##                                                                                                                                       definition
##                                                                                                                                           <char>
## 1:                                                                     The binding of a cell to the extracellular matrix via adhesion molecules.
## 2: The attachment of a cell, either to another cell or to an underlying substrate such as the extracellular matrix, via cell adhesion molecules.
## 3:                   The attachment of one cell to another cell via adhesion molecules that require the presence of calcium for the interaction.
## 4:                               The attachment of a plasma membrane adhesion molecule in one cell to an identical molecule in an adjacent cell.
## 5:                                   The attachment of an adhesion molecule in one cell to a nonidentical adhesion molecule in an adjacent cell.
##    Aboral_elim.genes_frequency Aboral_elim.pvalue Aboral_elim.-log10_pvalue
##                         <char>              <num>                     <num>
## 1:              8.738% (9/103)       9.829966e-03                      2.01
## 2:            10.423% (69/662)       3.718063e-03                      2.43
## 3:              29.412% (5/17)       2.216604e-04                      3.65
## 4:            20.175% (23/114)       6.027593e-12                     11.22
## 5:             30.769% (12/39)       4.576971e-09                      8.34
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 Aboral_elim.Significant_genes
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <char>
## 1:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        Pocillopora_acuta_HIv2___RNAseq.g11876.t1;Pocillopora_acuta_HIv2___RNAseq.g12272.t1;Pocillopora_acuta_HIv2___RNAseq.g12278.t1;Pocillopora_acuta_HIv2___RNAseq.g13823.t1;Pocillopora_acuta_HIv2___RNAseq.g25351.t1;Pocillopora_acuta_HIv2___RNAseq.g28502.t1;Pocillopora_acuta_HIv2___RNAseq.g5342.t1;Pocillopora_acuta_HIv2___RNAseq.g7895.t1;Pocillopora_acuta_HIv2___TS.g10825.t1
## 2: Pocillopora_acuta_HIv2___RNAseq.2533_t;Pocillopora_acuta_HIv2___RNAseq.6832_t;Pocillopora_acuta_HIv2___RNAseq.g10343.t1;Pocillopora_acuta_HIv2___RNAseq.g10385.t1;Pocillopora_acuta_HIv2___RNAseq.g1119.t1;Pocillopora_acuta_HIv2___RNAseq.g11876.t1;Pocillopora_acuta_HIv2___RNAseq.g12258.t1;Pocillopora_acuta_HIv2___RNAseq.g12272.t1;Pocillopora_acuta_HIv2___RNAseq.g12278.t1;Pocillopora_acuta_HIv2___RNAseq.g12281.t1;Pocillopora_acuta_HIv2___RNAseq.g12374.t1;Pocillopora_acuta_HIv2___RNAseq.g12987.t1;Pocillopora_acuta_HIv2___RNAseq.g13823.t1;Pocillopora_acuta_HIv2___RNAseq.g14253.t1;Pocillopora_acuta_HIv2___RNAseq.g14347.t1;Pocillopora_acuta_HIv2___RNAseq.g14375.t1;Pocillopora_acuta_HIv2___RNAseq.g16484.t1;Pocillopora_acuta_HIv2___RNAseq.g17517.t1;Pocillopora_acuta_HIv2___RNAseq.g17519.t1;Pocillopora_acuta_HIv2___RNAseq.g18125.t1;Pocillopora_acuta_HIv2___RNAseq.g18385.t1;Pocillopora_acuta_HIv2___RNAseq.g19085.t1;Pocillopora_acuta_HIv2___RNAseq.g20876.t1;Pocillopora_acuta_HIv2___RNAseq.g21477.t1;Pocillopora_acuta_HIv2___RNAseq.g21479.t1;Pocillopora_acuta_HIv2___RNAseq.g21481.t1;Pocillopora_acuta_HIv2___RNAseq.g23178.t1;Pocillopora_acuta_HIv2___RNAseq.g23210.t1;Pocillopora_acuta_HIv2___RNAseq.g23886.t1;Pocillopora_acuta_HIv2___RNAseq.g24688.t1;Pocillopora_acuta_HIv2___RNAseq.g25351.t1;Pocillopora_acuta_HIv2___RNAseq.g25739.t1;Pocillopora_acuta_HIv2___RNAseq.g25761.t1;Pocillopora_acuta_HIv2___RNAseq.g27031.t1;Pocillopora_acuta_HIv2___RNAseq.g28109.t1;Pocillopora_acuta_HIv2___RNAseq.g28196.t1;Pocillopora_acuta_HIv2___RNAseq.g28226.t2;Pocillopora_acuta_HIv2___RNAseq.g28502.t1;Pocillopora_acuta_HIv2___RNAseq.g28971.t1a;Pocillopora_acuta_HIv2___RNAseq.g29143.t2;Pocillopora_acuta_HIv2___RNAseq.g29795.t1;Pocillopora_acuta_HIv2___RNAseq.g3826.t1;Pocillopora_acuta_HIv2___RNAseq.g3939.t1;Pocillopora_acuta_HIv2___RNAseq.g5181.t1;Pocillopora_acuta_HIv2___RNAseq.g5342.t1;Pocillopora_acuta_HIv2___RNAseq.g5388.t1;Pocillopora_acuta_HIv2___RNAseq.g5896.t1;Pocillopora_acuta_HIv2___RNAseq.g682.t1;Pocillopora_acuta_HIv2___RNAseq.g7895.t1;Pocillopora_acuta_HIv2___RNAseq.g805.t1;Pocillopora_acuta_HIv2___RNAseq.g8148.t1;Pocillopora_acuta_HIv2___RNAseq.g9049.t1;Pocillopora_acuta_HIv2___RNAseq.g9253.t1;Pocillopora_acuta_HIv2___TS.g10825.t1;Pocillopora_acuta_HIv2___TS.g11002.t4;Pocillopora_acuta_HIv2___TS.g12860.t1;Pocillopora_acuta_HIv2___TS.g14528.t1a;Pocillopora_acuta_HIv2___TS.g15590.t1;Pocillopora_acuta_HIv2___TS.g17199.t1;Pocillopora_acuta_HIv2___TS.g23602.t1;Pocillopora_acuta_HIv2___TS.g24030.t1;Pocillopora_acuta_HIv2___TS.g24743.t1;Pocillopora_acuta_HIv2___TS.g30706.t1;Pocillopora_acuta_HIv2___TS.g30914.t1;Pocillopora_acuta_HIv2___TS.g3265.t1;Pocillopora_acuta_HIv2___TS.g5115.t2;Pocillopora_acuta_HIv2___TS.g5308.t2;Pocillopora_acuta_HIv2___TS.g8258.t1;Pocillopora_acuta_HIv2___TS.g8259.t1b
## 3:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   Pocillopora_acuta_HIv2___RNAseq.g16484.t1;Pocillopora_acuta_HIv2___RNAseq.g17517.t1;Pocillopora_acuta_HIv2___RNAseq.g17519.t1;Pocillopora_acuta_HIv2___TS.g30706.t1;Pocillopora_acuta_HIv2___TS.g5308.t2
## 4:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              Pocillopora_acuta_HIv2___RNAseq.2533_t;Pocillopora_acuta_HIv2___RNAseq.g10343.t1;Pocillopora_acuta_HIv2___RNAseq.g12987.t1;Pocillopora_acuta_HIv2___RNAseq.g14253.t1;Pocillopora_acuta_HIv2___RNAseq.g16484.t1;Pocillopora_acuta_HIv2___RNAseq.g17517.t1;Pocillopora_acuta_HIv2___RNAseq.g17519.t1;Pocillopora_acuta_HIv2___RNAseq.g19085.t1;Pocillopora_acuta_HIv2___RNAseq.g23178.t1;Pocillopora_acuta_HIv2___RNAseq.g23210.t1;Pocillopora_acuta_HIv2___RNAseq.g29143.t2;Pocillopora_acuta_HIv2___RNAseq.g5181.t1;Pocillopora_acuta_HIv2___RNAseq.g5388.t1;Pocillopora_acuta_HIv2___RNAseq.g682.t1;Pocillopora_acuta_HIv2___RNAseq.g805.t1;Pocillopora_acuta_HIv2___RNAseq.g8148.t1;Pocillopora_acuta_HIv2___TS.g15590.t1;Pocillopora_acuta_HIv2___TS.g23602.t1;Pocillopora_acuta_HIv2___TS.g24743.t1;Pocillopora_acuta_HIv2___TS.g30706.t1;Pocillopora_acuta_HIv2___TS.g5308.t2;Pocillopora_acuta_HIv2___TS.g8258.t1;Pocillopora_acuta_HIv2___TS.g8259.t1b
## 5:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              Pocillopora_acuta_HIv2___RNAseq.2533_t;Pocillopora_acuta_HIv2___RNAseq.g12987.t1;Pocillopora_acuta_HIv2___RNAseq.g14253.t1;Pocillopora_acuta_HIv2___RNAseq.g28971.t1a;Pocillopora_acuta_HIv2___RNAseq.g5388.t1;Pocillopora_acuta_HIv2___RNAseq.g8148.t1;Pocillopora_acuta_HIv2___RNAseq.g9253.t1;Pocillopora_acuta_HIv2___TS.g15590.t1;Pocillopora_acuta_HIv2___TS.g23602.t1;Pocillopora_acuta_HIv2___TS.g30706.t1;Pocillopora_acuta_HIv2___TS.g30914.t1;Pocillopora_acuta_HIv2___TS.g3265.t1
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  Aboral_elim.Significant_genes_symbol
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <char>
## 1:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      Mucin-like protein;Receptor-type tyrosine-protein phosphatase alpha (Protein-tyrosine phosphatase alpha) (R-PTP-alpha) (EC 3.1.3.48) (LCA-related phosphatase) (PTPTY-28);Receptor-type tyrosine-protein phosphatase alpha (Protein-tyrosine phosphatase alpha) (R-PTP-alpha) (EC 3.1.3.48) (LCA-related phosphatase) (PTPTY-28);Mucin-like protein;Mammalian ependymin-related protein 1 (MERP-1);Mucin-like protein;Protein jagged-1 (Jagged1) (hJ1) (CD antigen CD339);Protein jagged-1 (Jagged1) (CD antigen CD339);Thrombospondin type-1 domain-containing protein 1 (Transmembrane molecule with thrombospondin module)
## 2: Hemicentin-1 (Fibulin-6) (FIBL-6);Protein turtle homolog B (Immunoglobulin superfamily member 9B) (IgSF9B);Protocadherin Fat 3 (FAT tumor suppressor homolog 3);Fibroblast growth factor receptor-like 1 (FGF receptor-like protein 1) (FGF homologous factor receptor) (FGFR-like protein) (Fibroblast growth factor receptor 5) (FGFR-5);Collagen alpha-6(VI) chain;Mucin-like protein;Receptor-type tyrosine-protein phosphatase S (R-PTP-S) (EC 3.1.3.48) (Chick receptor tyrosine phosphatase alpha) (CRYP alpha) (CRYPalpha);Receptor-type tyrosine-protein phosphatase alpha (Protein-tyrosine phosphatase alpha) (R-PTP-alpha) (EC 3.1.3.48) (LCA-related phosphatase) (PTPTY-28);Receptor-type tyrosine-protein phosphatase alpha (Protein-tyrosine phosphatase alpha) (R-PTP-alpha) (EC 3.1.3.48) (LCA-related phosphatase) (PTPTY-28);Receptor-type tyrosine-protein phosphatase F (EC 3.1.3.48) (Leukocyte common antigen related) (LAR);Tissue inhibitor of metalloproteinase;Protocadherin Fat 4 (hFat4) (Cadherin family member 14) (FAT tumor suppressor homolog 4) (Fat-like cadherin protein FAT-J);Mucin-like protein;Hemicentin-1 (Fibulin-6) (FIBL-6);Collagen alpha-4(VI) chain;Collagen alpha-5(VI) chain (Collagen alpha-1(XXIX) chain);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Melanotransferrin (Membrane-bound transferrin-like protein p97) (MTf) (CD antigen CD228);BAG family molecular chaperone regulator 4 (BAG-4) (Bcl-2-associated athanogene 4) (Silencer of death domains);Halomucin;Melanotransferrin (Membrane-bound transferrin-like protein p97) (MTf) (CD antigen CD228);Tenascin (TN) (Cytotactin) (GMEM) (GP 150-225) (Glioma-associated-extracellular matrix antigen) (Hexabrachion) (JI) (Myotendinous antigen) (Neuronectin) (Tenascin-C) (TN-C);Tenascin (TN) (Cytotactin) (GMEM) (GP 150-225) (Glioma-associated-extracellular matrix antigen) (Hexabrachion) (JI) (Myotendinous antigen) (Neuronectin) (Tenascin-C) (TN-C);Tenascin (TN) (Cytotactin) (GMEM) (GP 150-225) (Glioma-associated-extracellular matrix antigen) (Hexabrachion) (JI) (Myotendinous antigen) (Neuronectin) (Tenascin-C) (TN-C);Hemicentin-2;Hemicentin-2;Laminin subunit beta-1 (Laminin B1 chain) (Laminin-1 subunit beta) (Laminin-10 subunit beta) (Laminin-12 subunit beta) (Laminin-2 subunit beta) (Laminin-6 subunit beta) (Laminin-8 subunit beta);Neurexin-1 (Neurexin I-alpha) (Neurexin-1-alpha);Mammalian ependymin-related protein 1 (MERP-1);Ephrin-B2 (ELF-2) (EPH-related receptor tyrosine kinase ligand 5) (LERK-5) (HTK ligand) (HTK-L);Collagen alpha-6(VI) chain;Dystonin (Bullous pemphigoid antigen 1) (BPA) (Dystonia musculorum protein) (Hemidesmosomal plaque protein) (Microtubule actin cross-linking factor 2);Fibulin-2 (FIBL-2);Aggrecan core protein (Cartilage-specific proteoglycan core protein) (CSPCP) (Chondroitin sulfate proteoglycan core protein 1) (Chondroitin sulfate proteoglycan 1) [Cleaved into: Aggrecan core protein 2];Neurogenic locus Notch protein [Cleaved into: Processed neurogenic locus Notch protein];Mucin-like protein;Uromodulin (Tamm-Horsfall urinary glycoprotein) (THP) [Cleaved into: Uromodulin, secreted form];Roundabout homolog 2;Zonadhesin;Testican-2 (SPARC/osteonectin, CWCV, and Kazal-like domains proteoglycan 2);Spondin-1 (F-spondin);Roundabout homolog 2;Protein jagged-1 (Jagged1) (hJ1) (CD antigen CD339);Hemicentin-1 (Fibulin-6) (FIBL-6);Probable N-acetyltransferase camello (EC 2.3.1.-) (Xcml);Roundabout homolog 1;Protein jagged-1 (Jagged1) (CD antigen CD339);Protein turtle homolog B (Immunoglobulin superfamily member 9B);Hemicentin-1 (Fibulin-6) (FIBL-6);Allograft inflammatory factor 1 (AIF-1) (Ionized calcium-binding adapter molecule 1) (Protein G1);Receptor-type tyrosine-protein phosphatase delta (Protein-tyrosine phosphatase delta) (R-PTP-delta) (EC 3.1.3.48);Thrombospondin type-1 domain-containing protein 1 (Transmembrane molecule with thrombospondin module);Neurogenic locus Notch protein [Cleaved into: Processed neurogenic locus Notch protein];Collagen alpha-1(VI) chain;Peroxidasin homolog (EC 1.11.2.-) (Melanoma-associated antigen MG50) (Peroxidasin 1) (hsPxd01) (Vascular peroxidase 1) (p53-responsive gene 2 protein) [Cleaved into: PXDN active fragment];Hemicentin-1 (Fibulin-6) (FIBL-6);Laminin subunit alpha-1 (Laminin A chain) (Laminin-1 subunit alpha) (Laminin-3 subunit alpha) (S-laminin subunit alpha) (S-LAM alpha);Protocadherin Fat 4 (FAT tumor suppressor homolog 4) (Fat-like cadherin protein FAT-J);Neural cell adhesion molecule 2 (N-CAM-2) (NCAM-2) (Neural cell adhesion molecule RB-8) (R4B12);Hemicentin-2;Cadherin-related tumor suppressor (Protein fat) [Cleaved into: Ft-mito];Receptor-type tyrosine-protein phosphatase delta (Protein-tyrosine phosphatase delta) (R-PTP-delta) (EC 3.1.3.48);Receptor-type tyrosine-protein phosphatase delta (Protein-tyrosine phosphatase delta) (R-PTP-delta) (EC 3.1.3.48);Aggrecan core protein (Cartilage-specific proteoglycan core protein) (CSPCP);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Hemicentin-2;Neural cell adhesion molecule 1 (N-CAM-1) (NCAM-1)
## 3:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Cadherin-related tumor suppressor (Protein fat) [Cleaved into: Ft-mito];Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14)
## 4:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           Hemicentin-1 (Fibulin-6) (FIBL-6);Protocadherin Fat 3 (FAT tumor suppressor homolog 3);Protocadherin Fat 4 (hFat4) (Cadherin family member 14) (FAT tumor suppressor homolog 4) (Fat-like cadherin protein FAT-J);Hemicentin-1 (Fibulin-6) (FIBL-6);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Halomucin;Hemicentin-2;Hemicentin-2;Roundabout homolog 2;Roundabout homolog 2;Hemicentin-1 (Fibulin-6) (FIBL-6);Roundabout homolog 1;Protein turtle homolog B (Immunoglobulin superfamily member 9B);Hemicentin-1 (Fibulin-6) (FIBL-6);Hemicentin-1 (Fibulin-6) (FIBL-6);Protocadherin Fat 4 (FAT tumor suppressor homolog 4) (Fat-like cadherin protein FAT-J);Hemicentin-2;Cadherin-related tumor suppressor (Protein fat) [Cleaved into: Ft-mito];Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Hemicentin-2;Neural cell adhesion molecule 1 (N-CAM-1) (NCAM-1)
## 5:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          Hemicentin-1 (Fibulin-6) (FIBL-6);Protocadherin Fat 4 (hFat4) (Cadherin family member 14) (FAT tumor suppressor homolog 4) (Fat-like cadherin protein FAT-J);Hemicentin-1 (Fibulin-6) (FIBL-6);Uromodulin (Tamm-Horsfall urinary glycoprotein) (THP) [Cleaved into: Uromodulin, secreted form];Hemicentin-1 (Fibulin-6) (FIBL-6);Hemicentin-1 (Fibulin-6) (FIBL-6);Receptor-type tyrosine-protein phosphatase delta (Protein-tyrosine phosphatase delta) (R-PTP-delta) (EC 3.1.3.48);Hemicentin-1 (Fibulin-6) (FIBL-6);Protocadherin Fat 4 (FAT tumor suppressor homolog 4) (Fat-like cadherin protein FAT-J);Cadherin-related tumor suppressor (Protein fat) [Cleaved into: Ft-mito];Receptor-type tyrosine-protein phosphatase delta (Protein-tyrosine phosphatase delta) (R-PTP-delta) (EC 3.1.3.48);Receptor-type tyrosine-protein phosphatase delta (Protein-tyrosine phosphatase delta) (R-PTP-delta) (EC 3.1.3.48)
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Aboral,
    "GOterms")

Plotting of both tissues, pre-clustered by tissue

Custom plotting of results

clustered_Oral_DEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2_Oral@enrich_GOs@data) 

cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2_Oral,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>% 
  mutate(
    cluster = str_extract(cluster_full, "\\d+"),
    Cluster.name = str_replace(cluster_full,".*?_.*?_",""),
    Cluster.term = str_replace(cluster_full,"^\\d+_",""),
    Cluster.term = str_replace(Cluster.term,"_.*$","")
  ) %>%
  mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) %>%
  dplyr::select(cluster, Cluster.name)

clustered_Oral_DEGs_enrichedGO <- clustered_Oral_DEGs_enrichedGO %>%
  left_join(cluster_map, by = c("GO.cluster" = "cluster")) %>%
  mutate(Tissue="Oral")

clustered_Aboral_DEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2_Aboral@enrich_GOs@data)

cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2_Aboral,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>% 
  mutate(
    cluster = str_extract(cluster_full, "\\d+"),
    Cluster.name = str_replace(cluster_full,".*?_.*?_",""),
    Cluster.term = str_replace(cluster_full,"^\\d+_",""),
    Cluster.term = str_replace(Cluster.term,"_.*$","")
  ) %>%
  mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) %>%
  dplyr::select(cluster, Cluster.name)
clustered_Aboral_DEGs_enrichedGO <- clustered_Aboral_DEGs_enrichedGO %>%
  left_join(cluster_map, by = c("GO.cluster" = "cluster")) %>%
  mutate(Tissue="Aboral")


clustered_allDEGs_enrichedGO <- rbind(clustered_Oral_DEGs_enrichedGO %>%
                                        dplyr::select(-contains("Oral")),
                                      clustered_Aboral_DEGs_enrichedGO %>%
                                        dplyr::select(-contains("Aboral")))

NEW: Plotting of each tissue cluster separately

extract cluster trees

library(ggdendro)
library(patchwork)
dist_mat <- slot(Wang_clusters_wardD2_Oral, "clusters_dist")[["BMA"]]
hc <- hclust(dist_mat, method = "ward.D2")
dend <- as.dendrogram(hc)

dend_data <- ggdendro::dendro_data(dend, type = "rectangle")

dend_labels <- dend_data$labels %>%
    mutate(
    cluster = str_extract(label, "\\d+"),
    Cluster.name = str_replace(label,".*?_.*?_",""),
    Cluster.term = str_replace(label,"^\\d+_",""),
    Cluster.term = str_replace(Cluster.term,"_.*$","")
  ) %>%
  mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) 

bar_plot <- clustered_allDEGs_enrichedGO %>%  
filter(Tissue == "Oral") %>%
  arrange(as.numeric(GO.cluster)) %>%
  mutate(Cluster_label = factor(Cluster.name, levels = rev(dend_labels$Cluster.name))) %>%
  ggplot(aes(x = Cluster_label)) +
  stat_count(width = .8, fill="#FD8D3C", alpha = 0.8) +
  coord_flip() +
  theme_classic(base_size = 9) +
  scale_y_continuous(
    breaks = scales::breaks_width(2),
    expand = c(0, 0),
    limits = c(0, NA)
  ) + 
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
    axis.title.y = element_blank(),
    axis.text.y = element_text(size = 7, lineheight = 0.7,color = "black"),
    strip.text = element_text(face = "bold", size = 9),
    strip.background = element_rect(fill = "white", color = "black"),
    legend.position = "none",
    plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
    panel.spacing.y = unit(0.2, "lines")
  ) + labs(subtitle = "Oral epidermis", y = "Terms in Cluster")

dend_plot <- ggplot() +
  geom_segment(
    data = dend_data$segments,
    aes(x = y, y = x, xend = yend, yend = xend),
    linewidth = 0.6,
    color = "#2C3E50"
  ) +
  scale_y_reverse(
    breaks = seq_along(dend_labels$Cluster.name),
    expand = c(0.02, 0.02)
  ) +
  scale_x_reverse(expand = c(0.02, 0)) +
  theme_void() +
  theme(
    plot.margin = margin(t = 30, r = 0, b = 22, l = 5)
  )

# Combine plots with better proportions
final <- dend_plot + bar_plot +
  plot_layout(widths = c(0.8, 3.5))

Wang_clusters_wardD2_Oral@heatmap$GOclusters_static

ggsave("../output_RNA/differential_expression/semantic-enrichment/BP_Dend_Clusters_Oral.png", final, width = 5, height = 6.5, dpi = 300)
library(ggdendro)
dist_mat <- slot(Wang_clusters_wardD2_Aboral, "clusters_dist")[["BMA"]]
hc <- hclust(dist_mat, method = "ward.D2")
dend <- as.dendrogram(hc)

dend_data <- ggdendro::dendro_data(dend, type = "rectangle")

dend_labels <- dend_data$labels %>%
    mutate(
    cluster = str_extract(label, "\\d+"),
    Cluster.name = str_replace(label,".*?_.*?_",""),
    Cluster.term = str_replace(label,"^\\d+_",""),
    Cluster.term = str_replace(Cluster.term,"_.*$","")
  ) %>%
  mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) 

bar_plot <- clustered_allDEGs_enrichedGO %>%  
filter(Tissue == "Aboral") %>%
  arrange(as.numeric(GO.cluster)) %>%
  mutate(Cluster_label = factor(Cluster.name, levels = rev(dend_labels$Cluster.name))) %>%
  ggplot(aes(x = Cluster_label)) +
  stat_count(width = .8, fill="#756BB1", alpha = 0.8) +
  coord_flip() +
  theme_classic(base_size = 9) +
  scale_y_continuous(
    breaks = scales::breaks_width(2),
    expand = c(0, 0),
    limits = c(0, NA)
  ) + 
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
    axis.title.y = element_blank(),
    axis.text.y = element_text(size = 7, lineheight = 0.7,color = "black"),
    strip.text = element_text(face = "bold", size = 9),
    strip.background = element_rect(fill = "white", color = "black"),
    legend.position = "none",
    plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
    panel.spacing.y = unit(0.2, "lines")
  ) + labs(subtitle = "Aboral", y = "Terms in Cluster")

dend_plot <- ggplot() +
  geom_segment(
    data = dend_data$segments,
    aes(x = y, y = x, xend = yend, yend = xend),
    linewidth = 0.6,
    color = "#2C3E50"
  ) +
  scale_y_reverse(
    breaks = seq_along(dend_labels$Cluster.name),
    expand = c(0.02, 0.02)
  ) +
  scale_x_reverse(expand = c(0.02, 0)) +
  theme_void() +
  theme(
    plot.margin = margin(t = 30, r = 0, b = 22, l = 5)
  )

# Combine plots with better proportions
final <- dend_plot + bar_plot +
  plot_layout(widths = c(0.8, 3.5))

Wang_clusters_wardD2_Oral@heatmap$GOclusters_static

ggsave("../output_RNA/differential_expression/semantic-enrichment/BP_Dend_Clusters_Aboral.png", final, width = 5, height = 6.5, dpi = 300)

bar plots

p1 <- clustered_allDEGs_enrichedGO %>%  
filter(Tissue == "Aboral") %>%
  arrange(as.numeric(GO.cluster)) %>%
  mutate(Cluster_label = factor(Cluster.name, levels = rev(unique(Cluster.name)))) %>%
  ggplot(aes(x = Cluster_label)) +
  stat_count(width = .8, fill="#756BB1", alpha = 0.8) +
  coord_flip() +
  scale_y_continuous(breaks = scales::breaks_width(2)) +
  theme_bw(base_size = 9) +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
    axis.title.y = element_blank(),
    axis.text.y = element_text(size = 7, lineheight = 0.7,color = "black"),
    strip.text = element_text(face = "bold", size = 9),
    strip.background = element_rect(fill = "white", color = "black"),
    legend.position = "none",
    plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
    panel.spacing.y = unit(0.2, "lines")
  ) + labs(subtitle = "Aboral", y = "Terms in Cluster")

p2 <- clustered_allDEGs_enrichedGO %>%  
filter(Tissue == "Oral") %>%
  arrange(as.numeric(GO.cluster)) %>%
  mutate(Cluster_label = factor(Cluster.name, levels = rev(unique(Cluster.name)))) %>%
  ggplot(aes(x = Cluster_label)) +
  stat_count(width = .8, fill="#FD8D3C", alpha = 0.8) +
  coord_flip() +
  theme_bw(base_size = 9) +
  scale_y_continuous(breaks = scales::breaks_width(2)) +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
    axis.title.y = element_blank(),
    axis.text.y = element_text(size = 7, lineheight = 0.7,color = "black"),
    strip.text = element_text(face = "bold", size = 9),
    strip.background = element_rect(fill = "white", color = "black"),
    legend.position = "none",
    plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
    panel.spacing.y = unit(0.2, "lines")
  ) + labs(subtitle = "Oral epidermis", y = "Terms in Cluster")

plot <- p1 + p2 +
  plot_annotation(
    title = "Semantic Similarity Clusters of All Enriched (p < 0.01) BP GO Terms",
    theme = theme(plot.title = element_text(face = "bold", size = 9, hjust = 0.5))
  )

ggsave("../output_RNA/differential_expression/semantic-enrichment/BP_Clusters.png", plot, width = 7, height = 6.5, dpi = 300)

NEW: Plotting of both tissues, pre-clustered by tissue

top_Oral_per_Cluster <- clustered_Oral_DEGs_enrichedGO %>% group_by(GO.cluster) %>% arrange(Oral_elim.pvalue) %>% slice(1) %>% ungroup() %>% dplyr::select(GO.cluster,GO.ID,term) %>% rename("top_sig_GO" = `GO.ID`,"top_sig_term" = `term`)

plotting_oral <- left_join(clustered_Oral_DEGs_enrichedGO,top_Oral_per_Cluster) %>%
  arrange(as.numeric(GO.cluster)) %>%
                        mutate(is_top = GO.ID == top_sig_GO,
                               pvalue = Oral_elim.pvalue,
                                term_wrapped = str_wrap(`term`, width = 25),
                                 cluster_label = factor(str_wrap(Cluster.name,
                                        width = 20), levels = unique(str_wrap(Cluster.name,
                                        width = 20)))) %>%
  add_count(GO.cluster, name = "n_terms")
## Joining with `by = join_by(GO.cluster)`
top_Aboral_per_Cluster <- clustered_Aboral_DEGs_enrichedGO %>% group_by(GO.cluster) %>% arrange(Aboral_elim.pvalue) %>% slice(1) %>% ungroup() %>% dplyr::select(GO.cluster,GO.ID,term) %>% rename("top_sig_GO" = `GO.ID`,"top_sig_term" = `term`)

plotting_aboral <- left_join(clustered_Aboral_DEGs_enrichedGO,top_Aboral_per_Cluster)%>%
                      arrange(as.numeric(GO.cluster)) %>%
                        mutate(is_top = GO.ID == top_sig_GO,
                               pvalue = Aboral_elim.pvalue,
                                term_wrapped = str_wrap(`term`, width = 25),
                                 cluster_label = factor(str_wrap(Cluster.name,
                                        width = 20), levels = unique(str_wrap(Cluster.name,
                                        width = 20)))) %>%
  add_count(GO.cluster, name = "n_terms")
## Joining with `by = join_by(GO.cluster)`
combined_plotting <- rbind(plotting_oral %>%
                                        dplyr::select(-contains("Oral")),
                                      plotting_aboral %>%
                                        dplyr::select(-contains("Aboral")))
library(ggrepel)
p_oral <- ggplot(plotting_oral, 
         aes(x = `Oral_elim.-log10_pvalue`, 
             y = reorder(GO.ID, `Oral_elim.-log10_pvalue`),
             color = factor(GO.cluster),
             size = `Oral_elim.-log10_pvalue`)) +
    geom_segment(aes(xend = 0, yend = reorder(GO.ID, `Oral_elim.-log10_pvalue`)),
                 alpha = 0.5, linewidth = 0.8) +
    geom_point(alpha = 0.8) +
    facet_grid(cluster_label ~ ., 
               scales = "free_y", space = "free_y") +
    geom_text_repel(data = plotting_oral %>% filter(is_top),
                  aes(label = term_wrapped),
                  size = 2.5,
                  hjust = 0,
                  nudge_x = 0.3,
                  direction = "y",
                  color = "black",
                  fontface = "bold",
                  segment.color = "gray40",
                  segment.size = 0.3,
                  box.padding = 0.3,
                  max.overlaps = 30) +
    labs(title = "Oral Epidermis Tissue",
         x = "-log10(p-value)",
         y = NULL) +
    theme_minimal(base_size = 10) +
    theme(plot.title = element_text(face = "bold", size = 12),
          axis.text.y = element_text(size = 7.5),
          strip.text.y = element_text(size = 8.5, angle = 0, hjust = 0, face = "bold"),
          legend.position = "none",
          panel.spacing = unit(0.5, "lines"),
          panel.grid.major.y = element_blank(),
          panel.grid.minor = element_blank()) +
    scale_size_continuous(range = c(2, 5))

p_aboral <- ggplot(plotting_aboral, 
         aes(x = `Aboral_elim.-log10_pvalue`, 
             y = reorder(GO.ID, `Aboral_elim.-log10_pvalue`),
             color = factor(GO.cluster),
             size = `Aboral_elim.-log10_pvalue`)) +
    geom_segment(aes(xend = 0, yend = reorder(GO.ID, `Aboral_elim.-log10_pvalue`)),
                 alpha = 0.5, linewidth = 0.8) +
    geom_point(alpha = 0.8) +
    facet_grid(cluster_label ~ ., 
               scales = "free_y", space = "free_y") +
  geom_text_repel(data = plotting_aboral %>% filter(is_top),
                  aes(label = term_wrapped),
                  size = 2.5,
                  hjust = 0,
                  nudge_x = 0.3,
                  direction = "y",
                  color = "black",
                  fontface = "bold",
                  segment.color = "gray40",
                  segment.size = 0.3,
                  box.padding = 0.3,
                  max.overlaps = 30) + 
  labs(title = "Aboral Tissue",
         x = "-log10(p-value)",
         y = NULL) +
    theme_minimal(base_size = 10) +
    theme(plot.title = element_text(face = "bold", size = 12),
          axis.text.y = element_text(size = 7.5),
          strip.text.y = element_text(size = 8.5, angle = 0, hjust = 0, face = "bold"),
          legend.position = "none",
          panel.spacing = unit(0.5, "lines"),
          panel.grid.major.y = element_blank(),
          panel.grid.minor = element_blank()) +
    scale_size_continuous(range = c(2, 5))
library(patchwork)
p2 <- p_oral + p_aboral +
  plot_annotation(
    title = "Enriched GO Terms by Semantic Cluster",
    subtitle = "Diamond = most significant term per cluster | Size = significance",
    theme = theme(plot.title = element_text(face = "bold", size = 14))
  )


ggsave("../output_RNA/differential_expression/semantic-enrichment/All_sig_BP_terms_by_cluster.pdf", p2, width = 14, height = 16)
p_oral <-ggplot(plotting_oral, 
       aes(x = `Oral_elim.-log10_pvalue`, 
           y = reorder(GO.ID, `Oral_elim.-log10_pvalue`),
           size = `Oral_elim.-log10_pvalue`)) +
  geom_segment(aes(xend = 0, yend = reorder(GO.ID, `Oral_elim.-log10_pvalue`)),
               alpha = 0.4, linewidth = 0.6, color = "#FD8D3C") +
  geom_point(alpha = 0.7, color = "#FD8D3C") +
    facet_grid(cluster_label ~ ., 
               scales = "free_y", space = "free_y") +
    geom_text_repel(data = plotting_oral %>% filter(is_top),
                  aes(label = str_wrap(term, width = 20)),
                  size = 2,
                  hjust = 0,
                  nudge_x = 0.5,
                  nudge_y = -0.5,
                  direction = "y",
                  color = "black",
                  segment.color = "gray50",
                  segment.size = 0.5,
                  max.overlaps = 30) +
    labs(title = "Oral Epidermis Tissue",
         x = "-log10(p-value)",
         y = NULL) +
    theme_minimal(base_size = 11) +
    theme(plot.title = element_text(face = "bold", size = 12),
          axis.text.y = element_text(size = 7.5),
          strip.text.y = element_text(size = 7.5, angle = 0, hjust = 0, face = "bold"),
          legend.position = "none",
          panel.spacing = unit(.15, "lines"),
          panel.grid.major.y = element_blank(),
          panel.grid.minor = element_blank()) +
    scale_size_continuous(range = c(2, 5))

p_aboral <- ggplot(plotting_aboral, 
         aes(x = `Aboral_elim.-log10_pvalue`, 
             y = reorder(GO.ID, `Aboral_elim.-log10_pvalue`),
             color = factor(GO.cluster),
             size = `Aboral_elim.-log10_pvalue`)) +
    geom_segment(aes(xend = 0, yend = reorder(GO.ID, `Aboral_elim.-log10_pvalue`)),
               alpha = 0.4, linewidth = 0.6, color = "#756BB1") +
  geom_point(alpha = 0.7, color = "#756BB1") +
      facet_grid(cluster_label ~ ., 
               scales = "free_y", space = "free_y") +
  geom_text_repel(data = plotting_aboral %>% filter(is_top),
                  aes(label = str_wrap(term, width = 20)),
                  size = 2,
                  hjust = 0,
                  nudge_x = 0.5,
                  nudge_y = -0.5,
                  direction = "y",
                  color = "black",
                  segment.color = "gray50",
                  segment.size = 0.5,
                  max.overlaps = 30) +
  labs(title = "Aboral Tissue",
         x = "-log10(p-value)",
         y = NULL) +
   theme_minimal(base_size = 11) +
    theme(plot.title = element_text(face = "bold", size = 12),
          axis.text.y = element_text(size = 7.5),
          strip.text.y = element_text(size = 7.5, angle = 0, hjust = 0, face = "bold"),
          legend.position = "none",
          panel.spacing = unit(.15, "lines"),
          panel.grid.major.y = element_blank(),
          panel.grid.minor = element_blank()) +
    scale_size_continuous(range = c(2, 5))

p2 <- p_oral + p_aboral +
  plot_annotation(
    title = "Tissue-specific GO term enrichment patterns",
    theme = theme(plot.title = element_text(face = "bold", size = 14))
  )


ggsave("../output_RNA/differential_expression/semantic-enrichment/All_sig_BP_terms_by_cluster_colored.pdf", p2, width = 14, height = 16)
ggsave("../output_RNA/differential_expression/semantic-enrichment/All_sig_BP_terms_by_cluster_colored2.png", p2, width = 8, height = 12, dpi = 300)

Plot

library(dplyr)
library(tidyr)
library(ggplot2)

# Create bidirectional data
cluster_data <- clustered_allDEGs_enrichedGO %>%
  dplyr::select(Cluster.name, Tissue) %>%
  group_by(Cluster.name) %>%
  summarise(
    Oral_terms = sum(Tissue=="Oral"),
    Aboral_terms = sum(Tissue=="Aboral"),
    .groups = "drop"
  ) %>%
  mutate(
    deviation = Aboral_terms - Oral_terms,
    Oral_terms = -Oral_terms  # Make negative for bidirectional plot
  ) %>%
  pivot_longer(cols = c("Oral_terms", "Aboral_terms"), 
               names_to = "Tissue", values_to = "n_terms") %>%
  mutate(Tissue = ifelse(Tissue == "Oral_terms", "Oral epidermis", "Aboral"))

# Calculate transition points for dashed lines
transition_data <- cluster_data %>%
  distinct(Cluster.name, deviation) %>%
  arrange(deviation)

oral_to_equal <- sum(transition_data$deviation >= 0) + 0.5
equal_to_aboral <- sum(transition_data$deviation > 0) + 0.5

# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
  geom_col(width = 0.7, alpha = 0.8) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
  geom_vline(xintercept = c(oral_to_equal, equal_to_aboral), 
             color = "grey60", linetype = "dashed", linewidth = 0.6) +
  scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
  coord_flip() +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12)
  ) +
  scale_y_continuous(labels = abs) +
  labs(
    x = "Gene clusters",
    y = "Number of enriched GO terms",
    title = "Tissue-specific GO term enrichment patterns",
    subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
  )

ggsave("../output_RNA/differential_expression/semantic-enrichment/GO_enrichment_bidirectional_clustered_separate.png", p, width = 10, height = 8, dpi = 300)

Both tissues together:

create topGO objects and perform enrichment using topGO wrapped by ViSEAGO

BP_Results <- ViSEAGO::merge_enrich_terms(
    Input = list(Oral_elim = c("BP_Oral", "elim_Oral"),
                 Aboral_elim = c("BP_Aboral", "elim_Aboral"))
)
## 'select()' returned 1:1 mapping between keys and columns

Visualize and save initial results

# display the merged table
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
##  Oral_elim
##       BP_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 826
##         feasible_genes: 9496
##         feasible_genes_significant: 712
##         genes_nodeSize: 5
##         nodes_number: 6675
##         edges_number: 14047
##       elim_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 6675
##         GO_significant: 67
##         feasible_genes: 9496
##         feasible_genes_significant: 712
##         genes_nodeSize: 5
##         Nontrivial_nodes: 4469 
##  Aboral_elim
##       BP_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 376
##         feasible_genes: 9496
##         feasible_genes_significant: 332
##         genes_nodeSize: 5
##         nodes_number: 6675
##         edges_number: 14047
##       elim_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 6675
##         GO_significant: 118
##         feasible_genes: 9496
##         feasible_genes_significant: 332
##         genes_nodeSize: 5
##         Nontrivial_nodes: 3092 
##  - enrichment pvalue cutoff:
##         Oral_elim : 0.01
##         Aboral_elim : 0.01
## - enrich GOs (in at least one list): 185 GO terms of 2 conditions.
##         Oral_elim : 67 terms
##         Aboral_elim : 118 terms
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
    BP_Results,
    "../output_RNA/differential_expression/semantic-enrichment/DE_05.tsv"
)
  • enrichment pvalue cutoff: Oral_elim : 0.01 Aboral_elim : 0.01
  • enrich GOs (in at least one list): 185 GO terms of 2 conditions. Oral_elim : 67 terms Aboral_elim : 118 terms

Semantic similarity –> Cluster: Use Wang graph-based method to identify similarity between GO terms

# initialize 
myGOs<-ViSEAGO::build_GO_SS(
    gene2GO=myGENE2GO_Pacuta,
    enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
    myGOs,
   distance=c("Wang")
)

myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Oral_elim
##       BP_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 826
##         feasible_genes: 9496
##         feasible_genes_significant: 712
##         genes_nodeSize: 5
##         nodes_number: 6675
##         edges_number: 14047
##       elim_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 6675
##         GO_significant: 67
##         feasible_genes: 9496
##         feasible_genes_significant: 712
##         genes_nodeSize: 5
##         Nontrivial_nodes: 4469 
##  Aboral_elim
##       BP_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 376
##         feasible_genes: 9496
##         feasible_genes_significant: 332
##         genes_nodeSize: 5
##         nodes_number: 6675
##         edges_number: 14047
##       elim_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 6675
##         GO_significant: 118
##         feasible_genes: 9496
##         feasible_genes_significant: 332
##         genes_nodeSize: 5
##         Nontrivial_nodes: 3092 
##  - enrichment pvalue cutoff:
##         Oral_elim : 0.01
##         Aboral_elim : 0.01
## - enrich GOs (in at least one list): 185 GO terms of 2 conditions.
##         Oral_elim : 67 terms
##         Aboral_elim : 118 terms
## - terms distances:  Wang

Multi Dimensional Scaling Plot of the GO-terms based on semantic similarity

# display MDSplot
ViSEAGO::MDSplot(myGOs,
                 "GOterms")

Heatmap + table of GO terms, with clusters aggregated by hclust with ward.D2 method

# GOterms heatmap with the default parameters
Wang_clusters_wardD2 <- ViSEAGO::GOterms_heatmap(
  myGOs,
  showIC = TRUE,
  showGOlabels = TRUE,
  GO.tree = list(
    tree = list(distance = "Wang", aggreg.method = "ward.D2"),
    cut = list(
      dynamic = list(
        pamStage = TRUE,
        pamRespectsDendro = TRUE,
        deepSplit = 2,
        minClusterSize = 4
      )
    )
  ),
  samples.tree = NULL
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2,
    "GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2)
# Print the clusters-heatmap table
ViSEAGO::show_table(
    Wang_clusters_wardD2,
    "../output_RNA/differential_expression/semantic-enrichment/DE_05_cluster_heatmap_Wang_wardD2.tsv"
)

Multi Dimensional Scaling Plot of the enriched GO-terms based on semantic similarity, colored by cluster

# display colored MDSplot
ViSEAGO::MDSplot(
    Wang_clusters_wardD2,
    "GOterms")

Further summarizing the Semantic Similarity-based clusters

# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2<-ViSEAGO::compute_SS_distances(
    Wang_clusters_wardD2,
    distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned many:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
    Wang_clusters_wardD2,
    "GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2<-ViSEAGO::GOclusters_heatmap(
    Wang_clusters_wardD2,
    tree=list(
        distance="BMA",
        aggreg.method="ward.D2"
    )
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2,
    "GOclusters")

Custom plotting of results

clustered_allDEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2@enrich_GOs@data)

cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>% 
  mutate(
    cluster = str_extract(cluster_full, "\\d+"),
    Cluster.name = str_replace(cluster_full,".*?_.*?_","")
  ) %>%
  mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) %>%
  dplyr::select(cluster, Cluster.name)

cluster_map
##    cluster                                              Cluster.name
## 1        1                  Cluster 1: neuron projection development
## 2        2                           Cluster 2: cell differentiation
## 3        3               Cluster 3: anatomical structure development
## 4        4               Cluster 4: anatomical structure development
## 5        5             Cluster 5: multicellular organism development
## 6        6                      Cluster 6: sensory organ development
## 7        7               Cluster 7: anatomical structure development
## 8        8                                      Cluster 8: transport
## 9        9                                      Cluster 9: transport
## 10      10 Cluster 10: external encapsulating structure organization
## 11      11                            Cluster 11: biological_process
## 12      12                   Cluster 12: regulation of cell adhesion
## 13      13                                 Cluster 13: cell adhesion
## 14      14                             Cluster 14: secretion by cell
## 15      15                           Cluster 15: cell-cell signaling
## 16      16              Cluster 16: regulation of biological process
## 17      17                             Cluster 17: metabolic process
## 18      18                             Cluster 18: metabolic process
## 19      19              Cluster 19: multicellular organismal process
## 20      20                        Cluster 20: nervous system process
## 21      21                          Cluster 21: response to stimulus
## 22      22                       Cluster 22: immune effector process
## 23      23                            Cluster 23: biological_process

Manually fix cluster names based on GO terms inside each cluster

cluster_map_fixed <- cluster_map #%>%
  # mutate(Cluster.name = str_replace(Cluster.name, "Cluster 2: developmental process", "Cluster 2: cell fate determination"),
  #        Cluster.name = str_replace(Cluster.name, "Cluster 4: anatomical structure development", "Cluster 4: regeneration & tissue morphogenesis"),
  #        Cluster.name = str_replace(Cluster.name, "Cluster 5: multicellular organism development", "Cluster 5: multicellular organism pattern formation"),
  #        Cluster.name = str_replace(Cluster.name, "Cluster 9: transport", "Cluster 9: metal ion transmembrane transport"),
  #        Cluster.name = str_replace(Cluster.name, "Cluster 10: transport", "Cluster 10: amino acid & small molecule transmembrane transport"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 12: nervous system process", "Cluster 12: sensory perception"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 16: biological_process", "Cluster 16: regeneration & wound healing"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 18: secretion by cell", "Cluster 18: neurotransmitter and hormone secretion"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 24: external encapsulating structure organization", "Cluster 24: extracellular matrix organization"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 25: biological_process", "Cluster 25: cell organization and homeostasis")
  #        )
clustered_allDEGs_enrichedGO <- clustered_allDEGs_enrichedGO %>%
  left_join(cluster_map_fixed, by = c("GO.cluster" = "cluster"))

write.csv(clustered_allDEGs_enrichedGO,"../output_RNA/differential_expression/semantic-enrichment/DE_05_clusters_named.csv")

library(DT)

# Interactive table with scrolling
datatable(clustered_allDEGs_enrichedGO,
          options = list(
            pageLength = 10,   # rows per page
            scrollX = TRUE,    # horizontal scroll if wide
            scrollY = "400px"  # vertical scroll
          ))

Plot

library(dplyr)
library(tidyr)
library(ggplot2)

# Create bidirectional data
cluster_data <- clustered_allDEGs_enrichedGO %>%
  dplyr::select(Cluster.name, Oral_elim.Significant_genes, Aboral_elim.Significant_genes) %>%
  mutate(
    # Clean cluster names
    #Cluster.name = gsub("^Cluster \\d+:\\s*", "", Cluster.name),
    Oral_terms = ifelse(!is.na(Oral_elim.Significant_genes), 1, 0),
    Aboral_terms = ifelse(!is.na(Aboral_elim.Significant_genes), 1, 0)
  ) %>%
  group_by(Cluster.name) %>%
  summarise(
    Oral_terms = sum(Oral_terms),
    Aboral_terms = sum(Aboral_terms),
    .groups = "drop"
  ) %>%
  filter(Oral_terms + Aboral_terms > 0) %>%
  mutate(
    deviation = Aboral_terms - Oral_terms,
    Oral_terms = -Oral_terms  # Make negative for bidirectional plot
  ) %>%
  pivot_longer(cols = c("Oral_terms", "Aboral_terms"), 
               names_to = "Tissue", values_to = "n_terms") %>%
  mutate(Tissue = ifelse(Tissue == "Oral_terms", "Oral epidermis", "Aboral"))

datatable(cluster_data,
          options = list(
            pageLength = 10,   # rows per page
            scrollX = TRUE,    # horizontal scroll if wide
            scrollY = "400px"  # vertical scroll
          ))
# Calculate transition points for dashed lines
transition_data <- cluster_data %>%
  distinct(Cluster.name, deviation) %>%
  arrange(deviation)

oral_to_equal <- sum(transition_data$deviation >= 0) + 0.5
equal_to_aboral <- sum(transition_data$deviation > 0) + 0.5

# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
  geom_col(width = 0.7, alpha = 0.8) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
  geom_vline(xintercept = c(oral_to_equal, equal_to_aboral), 
             color = "grey60", linetype = "dashed", linewidth = 0.6) +
  scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
  coord_flip() +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12)
  ) +
  scale_y_continuous(labels = abs) +
  labs(
    x = "Gene clusters",
    y = "Number of enriched GO terms",
    title = "Tissue-specific GO term enrichment patterns",
    subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
  )

ggsave("../output_RNA/differential_expression/semantic-enrichment/GO_enrichment_bidirectional.png", p, width = 10, height = 8, dpi = 300)

print(p)

# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
  geom_col(width = 0.7, alpha = 0.8) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
  geom_vline(xintercept = c(oral_to_equal, equal_to_aboral), 
             color = "grey60", linetype = "dashed", linewidth = 0.6) +
  scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) +  #green: "#2E8B57"
  coord_flip() +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12, hjust=0)
  ) +
  scale_y_continuous(labels = abs) +
  scale_x_discrete(position = "top") +
  labs(
    x = "Gene clusters",
    y = "Number of enriched GO terms",
    title = "Tissue-specific GO term enrichment patterns",
    subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
  )

ggsave("../output_RNA/differential_expression/semantic-enrichment/GO_enrichment_bidirectional_rightaxis.png", p, width = 10, height = 8, dpi = 300)

print(p)

# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
  geom_col(width = 0.7, alpha = 0.8) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
  geom_vline(xintercept = c(oral_to_equal, equal_to_aboral), 
             color = "grey60", linetype = "dashed", linewidth = 0.6) +
  scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
  #coord_flip() +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "right",
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(size = 12,angle = 65, hjust = 1),
    axis.text.y = element_text(size = 12)
  ) +
  scale_y_continuous(labels = abs) +
  labs(
    x = "Gene clusters",
    y = "Number of enriched GO terms",
    title = "Tissue-specific GO term enrichment patterns",
    subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
  )

p

ggsave("../output_RNA/differential_expression/semantic-enrichment/GO_enrichment_bidirectional_horiz.png", p, width = 12, height = 8, dpi = 300)

CC

Oral Epidermis:

create topGO objects and perform enrichment using topGO wrapped by ViSEAGO

# create viseago object
selection <- names(selection_Oral)[selection_Oral==1]
background <- names(expressed)

CC_Oral <- ViSEAGO::create_topGOdata(
    geneSel=selection,
    allGenes=background,
    gene2GO=myGENE2GO_Pacuta, 
    ont="CC",
    nodeSize=5
)
## 
## Building most specific GOs .....
##  ( 1542 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1765 GO terms and 2911 relations. )
## 
## Annotating nodes ...............
##  ( 10052 genes annotated to the GO terms. )
# perform TopGO test using elim algorithm
elim_Oral <- topGO::runTest(
    CC_Oral,
    algorithm ="elim",
    statistic = "fisher"
)
## 
##           -- Elim Algorithm -- 
## 
##       the algorithm is scoring 611 nontrivial nodes
##       parameters: 
##           test statistic: fisher
##           cutOff: 0.01
## 
##   Level 12:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 11:  21 nodes to be scored   (0 eliminated genes)
## 
##   Level 10:  60 nodes to be scored   (0 eliminated genes)
## 
##   Level 9:   85 nodes to be scored   (24 eliminated genes)
## 
##   Level 8:   95 nodes to be scored   (30 eliminated genes)
## 
##   Level 7:   97 nodes to be scored   (61 eliminated genes)
## 
##   Level 6:   96 nodes to be scored   (342 eliminated genes)
## 
##   Level 5:   67 nodes to be scored   (601 eliminated genes)
## 
##   Level 4:   43 nodes to be scored   (601 eliminated genes)
## 
##   Level 3:   43 nodes to be scored   (2500 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (3424 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (3424 eliminated genes)
CC_Results <- ViSEAGO::merge_enrich_terms(
    Input = list( Oral_elim = c("CC_Oral", "elim_Oral")))
## 'select()' returned 1:1 mapping between keys and columns

Visualize and save initial results

# display the merged table
CC_Results
## - object class: enrich_GO_terms
## - ontology: CC
## - method: topGO
## - summary:
##  Oral_elim
##       CC_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 826
##         feasible_genes: 10052
##         feasible_genes_significant: 762
##         genes_nodeSize: 5
##         nodes_number: 967
##         edges_number: 1615
##       elim_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 967
##         GO_significant: 17
##         feasible_genes: 10052
##         feasible_genes_significant: 762
##         genes_nodeSize: 5
##         Nontrivial_nodes: 611 
##  - enrichment pvalue cutoff:
##         Oral_elim : 0.01
## - enrich GOs (in at least one list): 17 GO terms of 1 conditions.
##         Oral_elim : 17 terms
ViSEAGO::show_table(CC_Results)
# print the merged table in a file
ViSEAGO::show_table(
    CC_Results,
    "../output_RNA/differential_expression/semantic-enrichment/CC_DE_05_Oral.tsv"
)
  • enrichment pvalue cutoff: Oral_elim : 0.01
  • enrich GOs (in at least one list): Oral_elim : 19 terms

Semantic similarity –> Cluster: Use Wang graph-based method to identify similarity between GO terms

# initialize 
myGOs<-ViSEAGO::build_GO_SS(
    gene2GO=myGENE2GO_Pacuta,
    enrich_GO_terms=CC_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
    myGOs,
   distance=c("Wang")
)

myGOs
## - object class: GO_SS
## - ontology: CC
## - method: topGO
## - summary:
## Oral_elim
##       CC_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 826
##         feasible_genes: 10052
##         feasible_genes_significant: 762
##         genes_nodeSize: 5
##         nodes_number: 967
##         edges_number: 1615
##       elim_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 967
##         GO_significant: 17
##         feasible_genes: 10052
##         feasible_genes_significant: 762
##         genes_nodeSize: 5
##         Nontrivial_nodes: 611 
##  - enrichment pvalue cutoff:
##         Oral_elim : 0.01
## - enrich GOs (in at least one list): 17 GO terms of 1 conditions.
##         Oral_elim : 17 terms
## - terms distances:  Wang

Multi Dimensional Scaling Plot of the GO-terms based on semantic similarity

# display MDSplot
ViSEAGO::MDSplot(myGOs,
                 "GOterms")

Heatmap + table of GO terms, with clusters aggregated by hclust with ward.D2 method

# GOterms heatmap with the default parameters
Wang_clusters_wardD2_Oral <- ViSEAGO::GOterms_heatmap(
  myGOs,
  showIC = TRUE,
  showGOlabels = TRUE,
  GO.tree = list(
    tree = list(distance = "Wang", aggreg.method = "ward.D2"),
    cut = list(
      dynamic = list(
        pamStage = TRUE,
        pamRespectsDendro = TRUE,
        deepSplit = 2,
        minClusterSize = 2
      )
    )
  ),
  samples.tree = NULL
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Oral,
    "GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2_Oral)
# Print the clusters-heatmap table
ViSEAGO::show_table(
    Wang_clusters_wardD2_Oral,
    "../output_RNA/differential_expression/semantic-enrichment/CC_DE_05_Oral_cluster_heatmap_Wang_wardD2.tsv"
)

Multi Dimensional Scaling Plot of the enriched GO-terms based on semantic similarity, colored by cluster

# display colored MDSplot
ViSEAGO::MDSplot(
    Wang_clusters_wardD2_Oral,
    "GOterms")

Further summarizing the Semantic Similarity-based clusters

# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2_Oral<-ViSEAGO::compute_SS_distances(
    Wang_clusters_wardD2_Oral,
    distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned 1:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
    Wang_clusters_wardD2_Oral,
    "GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2_Oral <-ViSEAGO::GOclusters_heatmap(
    Wang_clusters_wardD2_Oral,
    tree=list(
        distance="BMA",
        aggreg.method="ward.D2"
    )
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Oral,
    "GOclusters")

Aboral Epidermis:

create topGO objects and perform enrichment using topGO wrapped by ViSEAGO

# create viseago object
selection <- names(selection_Aboral)[selection_Aboral==1]
background <- names(expressed)

CC_Aboral <- ViSEAGO::create_topGOdata(
    geneSel=selection,
    allGenes=background,
    gene2GO=myGENE2GO_Pacuta, 
    ont="CC",
    nodeSize=5
)
## 
## Building most specific GOs .....
##  ( 1542 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1765 GO terms and 2911 relations. )
## 
## Annotating nodes ...............
##  ( 10052 genes annotated to the GO terms. )
# perform TopGO test using elim algorithm
elim_Aboral <- topGO::runTest(
    CC_Aboral,
    algorithm ="elim",
    statistic = "fisher"
)
## 
##           -- Elim Algorithm -- 
## 
##       the algorithm is scoring 351 nontrivial nodes
##       parameters: 
##           test statistic: fisher
##           cutOff: 0.01
## 
##   Level 11:  6 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  26 nodes to be scored   (0 eliminated genes)
## 
##   Level 9:   40 nodes to be scored   (0 eliminated genes)
## 
##   Level 8:   51 nodes to be scored   (7 eliminated genes)
## 
##   Level 7:   57 nodes to be scored   (19 eliminated genes)
## 
##   Level 6:   58 nodes to be scored   (431 eliminated genes)
## 
##   Level 5:   49 nodes to be scored   (850 eliminated genes)
## 
##   Level 4:   29 nodes to be scored   (1101 eliminated genes)
## 
##   Level 3:   32 nodes to be scored   (3129 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (3481 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (3481 eliminated genes)
CC_Results <- ViSEAGO::merge_enrich_terms(
    Input = list(Aboral_elim = c("CC_Aboral", "elim_Aboral"))
)
## 'select()' returned 1:1 mapping between keys and columns

Visualize and save initial results

# display the merged table
CC_Results
## - object class: enrich_GO_terms
## - ontology: CC
## - method: topGO
## - summary:
##  Aboral_elim
##       CC_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 376
##         feasible_genes: 10052
##         feasible_genes_significant: 364
##         genes_nodeSize: 5
##         nodes_number: 967
##         edges_number: 1615
##       elim_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 967
##         GO_significant: 23
##         feasible_genes: 10052
##         feasible_genes_significant: 364
##         genes_nodeSize: 5
##         Nontrivial_nodes: 351 
##  - enrichment pvalue cutoff:
##         Aboral_elim : 0.01
## - enrich GOs (in at least one list): 23 GO terms of 1 conditions.
##         Aboral_elim : 23 terms
ViSEAGO::show_table(CC_Results)
# print the merged table in a file
ViSEAGO::show_table(
    CC_Results,
    "../output_RNA/differential_expression/semantic-enrichment/CC_DE_05_Aboral.tsv"
)
  • enrichment pvalue cutoff: Aboral_elim : 0.01
  • enrich GOs (in at least one list): Aboral_elim : 23 terms

Semantic similarity –> Cluster: Use Wang graph-based method to identify similarity between GO terms

# initialize 
myGOs<-ViSEAGO::build_GO_SS(
    gene2GO=myGENE2GO_Pacuta,
    enrich_GO_terms=CC_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
    myGOs,
   distance=c("Wang")
)

myGOs
## - object class: GO_SS
## - ontology: CC
## - method: topGO
## - summary:
## Aboral_elim
##       CC_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 376
##         feasible_genes: 10052
##         feasible_genes_significant: 364
##         genes_nodeSize: 5
##         nodes_number: 967
##         edges_number: 1615
##       elim_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 967
##         GO_significant: 23
##         feasible_genes: 10052
##         feasible_genes_significant: 364
##         genes_nodeSize: 5
##         Nontrivial_nodes: 351 
##  - enrichment pvalue cutoff:
##         Aboral_elim : 0.01
## - enrich GOs (in at least one list): 23 GO terms of 1 conditions.
##         Aboral_elim : 23 terms
## - terms distances:  Wang

Multi Dimensional Scaling Plot of the GO-terms based on semantic similarity

# display MDSplot
ViSEAGO::MDSplot(myGOs,
                 "GOterms")

Heatmap + table of GO terms, with clusters aggregated by hclust with ward.D2 method

# GOterms heatmap with the default parameters
Wang_clusters_wardD2_Aboral <- ViSEAGO::GOterms_heatmap(
  myGOs,
  showIC = TRUE,
  showGOlabels = TRUE,
  GO.tree = list(
    tree = list(distance = "Wang", aggreg.method = "ward.D2"),
    cut = list(
      dynamic = list(
        pamStage = TRUE,
        pamRespectsDendro = TRUE,
        deepSplit = 2,
        minClusterSize = 2
      )
    )
  ),
  samples.tree = NULL
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Aboral,
    "GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2_Aboral)
# Print the clusters-heatmap table
ViSEAGO::show_table(
    Wang_clusters_wardD2_Aboral,
    "../output_RNA/differential_expression/semantic-enrichment/CC_DE_05_Aboral_cluster_heatmap_Wang_wardD2.tsv"
)

Multi Dimensional Scaling Plot of the enriched GO-terms based on semantic similarity, colored by cluster

# display colored MDSplot
ViSEAGO::MDSplot(
    Wang_clusters_wardD2_Aboral,
    "GOterms")

Further summarizing the Semantic Similarity-based clusters

# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2_Aboral<-ViSEAGO::compute_SS_distances(
    Wang_clusters_wardD2_Aboral,
    distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned many:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
    Wang_clusters_wardD2_Aboral,
    "GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2_Aboral<-ViSEAGO::GOclusters_heatmap(
    Wang_clusters_wardD2_Aboral,
    tree=list(
        distance="BMA",
        aggreg.method="ward.D2"
    )
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Aboral,
    "GOclusters")

Plotting of both tissues, pre-clustered by tissue

Custom plotting of results

clustered_Oral_DEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2_Oral@enrich_GOs@data) 

cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2_Oral,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>% 
  mutate(
    cluster = str_extract(cluster_full, "\\d+"),
    Cluster.name = str_replace(cluster_full,".*?_.*?_",""),
    Cluster.term = str_replace(cluster_full,"^\\d+_",""),
    Cluster.term = str_replace(Cluster.term,"_.*$","")
  ) #%>%
  #mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) #%>%
  #dplyr::select(cluster, Cluster.name)

clustered_Oral_DEGs_enrichedGO <- clustered_Oral_DEGs_enrichedGO %>%
  left_join(cluster_map, by = c("GO.cluster" = "cluster")) %>%
  mutate(Tissue="Oral")

clustered_Aboral_DEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2_Aboral@enrich_GOs@data)

cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2_Aboral,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>% 
  mutate(
    cluster = str_extract(cluster_full, "\\d+"),
    Cluster.name = str_replace(cluster_full,".*?_.*?_",""),
    Cluster.term = str_replace(cluster_full,"^\\d+_",""),
    Cluster.term = str_replace(Cluster.term,"_.*$","")
  ) #%>%
  #mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) #%>%
  #dplyr::select(cluster, Cluster.name)
clustered_Aboral_DEGs_enrichedGO <- clustered_Aboral_DEGs_enrichedGO %>%
  left_join(cluster_map, by = c("GO.cluster" = "cluster")) %>%
  mutate(Tissue="Aboral")


clustered_allDEGs_enrichedGO <- rbind(clustered_Oral_DEGs_enrichedGO %>%
                                        dplyr::select(-contains("Oral")),
                                      clustered_Aboral_DEGs_enrichedGO %>%
                                        dplyr::select(-contains("Aboral")))

Plot

library(dplyr)
library(tidyr)
library(ggplot2)

# Create bidirectional data
cluster_data <- clustered_allDEGs_enrichedGO %>%
  dplyr::select(Cluster.name, Tissue) %>%
  group_by(Cluster.name) %>%
  summarise(
    Oral_terms = sum(Tissue=="Oral"),
    Aboral_terms = sum(Tissue=="Aboral"),
    .groups = "drop"
  ) %>%
  mutate(
    deviation = Aboral_terms - Oral_terms,
    Oral_terms = -Oral_terms  # Make negative for bidirectional plot
  ) %>%
  pivot_longer(cols = c("Oral_terms", "Aboral_terms"), 
               names_to = "Tissue", values_to = "n_terms") %>%
  mutate(Tissue = ifelse(Tissue == "Oral_terms", "Oral epidermis", "Aboral"))

# Calculate transition points for dashed lines
transition_data <- cluster_data %>%
  distinct(Cluster.name, deviation) %>%
  arrange(deviation)

oral_to_equal <- sum(transition_data$deviation >= 0) + 0.5
equal_to_aboral <- sum(transition_data$deviation > 0) + 0.5

# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
  geom_col(width = 0.7, alpha = 0.8) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
  geom_vline(xintercept = c(oral_to_equal, equal_to_aboral), 
             color = "grey60", linetype = "dashed", linewidth = 0.6) +
  scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
  coord_flip() +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12)
  ) +
  scale_y_continuous(labels = abs) +
  labs(
    x = "Gene clusters",
    y = "Number of enriched GO terms",
    title = "Tissue-specific GO term enrichment patterns",
    subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
  )

ggsave("../output_RNA/differential_expression/semantic-enrichment/CC_GO_enrichment_bidirectional_clustered_separate.png", p, width = 10, height = 8, dpi = 300)

Both tissues together:

create topGO objects and perform enrichment using topGO wrapped by ViSEAGO

CC_Results <- ViSEAGO::merge_enrich_terms(
    Input = list(Oral_elim = c("CC_Oral", "elim_Oral"),
                 Aboral_elim = c("CC_Aboral", "elim_Aboral"))
)
## 'select()' returned 1:1 mapping between keys and columns

Visualize and save initial results

# display the merged table
CC_Results
## - object class: enrich_GO_terms
## - ontology: CC
## - method: topGO
## - summary:
##  Oral_elim
##       CC_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 826
##         feasible_genes: 10052
##         feasible_genes_significant: 762
##         genes_nodeSize: 5
##         nodes_number: 967
##         edges_number: 1615
##       elim_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 967
##         GO_significant: 17
##         feasible_genes: 10052
##         feasible_genes_significant: 762
##         genes_nodeSize: 5
##         Nontrivial_nodes: 611 
##  Aboral_elim
##       CC_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 376
##         feasible_genes: 10052
##         feasible_genes_significant: 364
##         genes_nodeSize: 5
##         nodes_number: 967
##         edges_number: 1615
##       elim_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 967
##         GO_significant: 23
##         feasible_genes: 10052
##         feasible_genes_significant: 364
##         genes_nodeSize: 5
##         Nontrivial_nodes: 351 
##  - enrichment pvalue cutoff:
##         Oral_elim : 0.01
##         Aboral_elim : 0.01
## - enrich GOs (in at least one list): 38 GO terms of 2 conditions.
##         Oral_elim : 17 terms
##         Aboral_elim : 23 terms
ViSEAGO::show_table(CC_Results)
# print the merged table in a file
ViSEAGO::show_table(
    CC_Results,
    "../output_RNA/differential_expression/semantic-enrichment/CC_DE_05.tsv"
)
  • enrichment pvalue cutoff: Oral_elim : 0.01 Aboral_elim : 0.01
  • enrich GOs (in at least one list): 40 GO terms of 2 conditions. Oral_elim : 19 terms Aboral_elim : 23 terms

Semantic similarity –> Cluster: Use Wang graph-based method to identify similarity between GO terms

# initialize 
myGOs<-ViSEAGO::build_GO_SS(
    gene2GO=myGENE2GO_Pacuta,
    enrich_GO_terms=CC_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
    myGOs,
   distance=c("Wang")
)

myGOs
## - object class: GO_SS
## - ontology: CC
## - method: topGO
## - summary:
## Oral_elim
##       CC_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 826
##         feasible_genes: 10052
##         feasible_genes_significant: 762
##         genes_nodeSize: 5
##         nodes_number: 967
##         edges_number: 1615
##       elim_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 967
##         GO_significant: 17
##         feasible_genes: 10052
##         feasible_genes_significant: 762
##         genes_nodeSize: 5
##         Nontrivial_nodes: 611 
##  Aboral_elim
##       CC_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 376
##         feasible_genes: 10052
##         feasible_genes_significant: 364
##         genes_nodeSize: 5
##         nodes_number: 967
##         edges_number: 1615
##       elim_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 967
##         GO_significant: 23
##         feasible_genes: 10052
##         feasible_genes_significant: 364
##         genes_nodeSize: 5
##         Nontrivial_nodes: 351 
##  - enrichment pvalue cutoff:
##         Oral_elim : 0.01
##         Aboral_elim : 0.01
## - enrich GOs (in at least one list): 38 GO terms of 2 conditions.
##         Oral_elim : 17 terms
##         Aboral_elim : 23 terms
## - terms distances:  Wang

Multi Dimensional Scaling Plot of the GO-terms based on semantic similarity

# display MDSplot
ViSEAGO::MDSplot(myGOs,
                 "GOterms")

Heatmap + table of GO terms, with clusters aggregated by hclust with ward.D2 method

# GOterms heatmap with the default parameters
Wang_clusters_wardD2 <- ViSEAGO::GOterms_heatmap(
  myGOs,
  showIC = TRUE,
  showGOlabels = TRUE,
  GO.tree = list(
    tree = list(distance = "Wang", aggreg.method = "ward.D2"),
    cut = list(
      dynamic = list(
        pamStage = TRUE,
        pamRespectsDendro = TRUE,
        deepSplit = 2,
        minClusterSize = 4
      )
    )
  ),
  samples.tree = NULL
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2,
    "GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2)
# Print the clusters-heatmap table
ViSEAGO::show_table(
    Wang_clusters_wardD2,
    "../output_RNA/differential_expression/semantic-enrichment/CC_DE_05_cluster_heatmap_Wang_wardD2.tsv"
)

Multi Dimensional Scaling Plot of the enriched GO-terms based on semantic similarity, colored by cluster

# display colored MDSplot
ViSEAGO::MDSplot(
    Wang_clusters_wardD2,
    "GOterms")

Further summarizing the Semantic Similarity-based clusters

# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2<-ViSEAGO::compute_SS_distances(
    Wang_clusters_wardD2,
    distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned 1:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
    Wang_clusters_wardD2,
    "GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2<-ViSEAGO::GOclusters_heatmap(
    Wang_clusters_wardD2,
    tree=list(
        distance="BMA",
        aggreg.method="ward.D2"
    )
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2,
    "GOclusters")

Custom plotting of results

clustered_allDEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2@enrich_GOs@data)

cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>% 
  mutate(
    cluster = str_extract(cluster_full, "\\d+"),
    Cluster.name = str_replace(cluster_full,".*?_.*?_","")
  ) %>%
  mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) %>%
  select(cluster, Cluster.name)

cluster_map
##   cluster                             Cluster.name
## 1       1          Cluster 1: extracellular matrix
## 2       2 Cluster 2: cellular anatomical structure
## 3       3       Cluster 3: intracellular organelle
## 4       4            Cluster 4: cellular_component
## 5       5                 Cluster 5: cell junction

Manually fix cluster names based on GO terms inside each cluster

cluster_map_fixed <- cluster_map #%>%
  # mutate(Cluster.name = str_replace(Cluster.name, "Cluster 2: developmental process", "Cluster 2: cell fate determination"),
  #        Cluster.name = str_replace(Cluster.name, "Cluster 4: anatomical structure development", "Cluster 4: regeneration & tissue morphogenesis"),
  #        Cluster.name = str_replace(Cluster.name, "Cluster 5: multicellular organism development", "Cluster 5: multicellular organism pattern formation"),
  #        Cluster.name = str_replace(Cluster.name, "Cluster 9: transport", "Cluster 9: metal ion transmembrane transport"),
  #        Cluster.name = str_replace(Cluster.name, "Cluster 10: transport", "Cluster 10: amino acid & small molecule transmembrane transport"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 12: nervous system process", "Cluster 12: sensory perception"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 16: biological_process", "Cluster 16: regeneration & wound healing"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 18: secretion by cell", "Cluster 18: neurotransmitter and hormone secretion"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 24: external encapsulating structure organization", "Cluster 24: extracellular matrix organization"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 25: biological_process", "Cluster 25: cell organization and homeostasis")
  #        )
clustered_allDEGs_enrichedGO <- clustered_allDEGs_enrichedGO %>%
  left_join(cluster_map_fixed, by = c("GO.cluster" = "cluster"))

write.csv(clustered_allDEGs_enrichedGO,"../output_RNA/differential_expression/semantic-enrichment/CC_DE_05_clusters_named.csv")

library(DT)

# Interactive table with scrolling
datatable(clustered_allDEGs_enrichedGO,
          options = list(
            pageLength = 10,   # rows per page
            scrollX = TRUE,    # horizontal scroll if wide
            scrollY = "400px"  # vertical scroll
          ))

Plot

library(dplyr)
library(tidyr)
library(ggplot2)

# Create bidirectional data
cluster_data <- clustered_allDEGs_enrichedGO %>%
  select(Cluster.name, Oral_elim.Significant_genes, Aboral_elim.Significant_genes) %>%
  mutate(
    # Clean cluster names
    #Cluster.name = gsub("^Cluster \\d+:\\s*", "", Cluster.name),
    Oral_terms = ifelse(!is.na(Oral_elim.Significant_genes), 1, 0),
    Aboral_terms = ifelse(!is.na(Aboral_elim.Significant_genes), 1, 0)
  ) %>%
  group_by(Cluster.name) %>%
  summarise(
    Oral_terms = sum(Oral_terms),
    Aboral_terms = sum(Aboral_terms),
    .groups = "drop"
  ) %>%
  filter(Oral_terms + Aboral_terms > 0) %>%
  mutate(
    deviation = Aboral_terms - Oral_terms,
    Oral_terms = -Oral_terms  # Make negative for bidirectional plot
  ) %>%
  pivot_longer(cols = c("Oral_terms", "Aboral_terms"), 
               names_to = "Tissue", values_to = "n_terms") %>%
  mutate(Tissue = ifelse(Tissue == "Oral_terms", "Oral epidermis", "Aboral"))

datatable(cluster_data,
          options = list(
            pageLength = 10,   # rows per page
            scrollX = TRUE,    # horizontal scroll if wide
            scrollY = "400px"  # vertical scroll
          ))
# Calculate transition points for dashed lines
transition_data <- cluster_data %>%
  distinct(Cluster.name, deviation) %>%
  arrange(deviation)

oral_to_equal <- sum(transition_data$deviation >= 0) + 0.5
equal_to_aboral <- sum(transition_data$deviation > 0) + 0.5

# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
  geom_col(width = 0.7, alpha = 0.8) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
  geom_vline(xintercept = c(oral_to_equal, equal_to_aboral), 
             color = "grey60", linetype = "dashed", linewidth = 0.6) +
  scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
  coord_flip() +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12)
  ) +
  scale_y_continuous(labels = abs) +
  labs(
    x = "Gene clusters",
    y = "Number of enriched GO terms",
    title = "Tissue-specific GO term enrichment patterns",
    subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
  )

ggsave("../output_RNA/differential_expression/semantic-enrichment/CC_GO_enrichment_bidirectional.png", p, width = 10, height = 8, dpi = 300)

print(p)

# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
  geom_col(width = 0.7, alpha = 0.8) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
  geom_vline(xintercept = c(oral_to_equal, equal_to_aboral), 
             color = "grey60", linetype = "dashed", linewidth = 0.6) +
  scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) +  #green: "#2E8B57"
  coord_flip() +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12, hjust=0)
  ) +
  scale_y_continuous(labels = abs) +
  scale_x_discrete(position = "top") +
  labs(
    x = "Gene clusters",
    y = "Number of enriched GO terms",
    title = "Tissue-specific GO term enrichment patterns",
    subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
  )

ggsave("../output_RNA/differential_expression/semantic-enrichment/CC_GO_enrichment_bidirectional_rightaxis.png", p, width = 10, height = 8, dpi = 300)

print(p)

# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
  geom_col(width = 0.7, alpha = 0.8) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
  geom_vline(xintercept = c(oral_to_equal, equal_to_aboral), 
             color = "grey60", linetype = "dashed", linewidth = 0.6) +
  scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
  #coord_flip() +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "right",
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(size = 12,angle = 65, hjust = 1),
    axis.text.y = element_text(size = 12)
  ) +
  scale_y_continuous(labels = abs) +
  labs(
    x = "Gene clusters",
    y = "Number of enriched GO terms",
    title = "Tissue-specific GO term enrichment patterns",
    subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
  )

p

ggsave("../output_RNA/differential_expression/semantic-enrichment/CC_GO_enrichment_bidirectional_horiz.png", p, width = 12, height = 8, dpi = 300)

MF

Oral Epidermis:

create topGO objects and perform enrichment using topGO wrapped by ViSEAGO

# create viseago object
selection <- names(selection_Oral)[selection_Oral==1]
background <- names(expressed)

MF_Oral <- ViSEAGO::create_topGOdata(
    geneSel=selection,
    allGenes=background,
    gene2GO=myGENE2GO_Pacuta, 
    ont="MF",
    nodeSize=5
)
## 
## Building most specific GOs .....
##  ( 3360 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 3929 GO terms and 5138 relations. )
## 
## Annotating nodes ...............
##  ( 9061 genes annotated to the GO terms. )
# perform TopGO test using elim algorithm
elim_Oral <- topGO::runTest(
    MF_Oral,
    algorithm ="elim",
    statistic = "fisher"
)
## 
##           -- Elim Algorithm -- 
## 
##       the algorithm is scoring 949 nontrivial nodes
##       parameters: 
##           test statistic: fisher
##           cutOff: 0.01
## 
##   Level 12:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 11:  8 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  15 nodes to be scored   (9 eliminated genes)
## 
##   Level 9:   40 nodes to be scored   (35 eliminated genes)
## 
##   Level 8:   82 nodes to be scored   (62 eliminated genes)
## 
##   Level 7:   132 nodes to be scored  (563 eliminated genes)
## 
##   Level 6:   198 nodes to be scored  (1099 eliminated genes)
## 
##   Level 5:   217 nodes to be scored  (1184 eliminated genes)
## 
##   Level 4:   184 nodes to be scored  (1485 eliminated genes)
## 
##   Level 3:   52 nodes to be scored   (1485 eliminated genes)
## 
##   Level 2:   18 nodes to be scored   (1562 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (1605 eliminated genes)
MF_Results <- ViSEAGO::merge_enrich_terms(
    Input = list( Oral_elim = c("MF_Oral", "elim_Oral")))
## 'select()' returned 1:1 mapping between keys and columns

Visualize and save initial results

# display the merged table
MF_Results
## - object class: enrich_GO_terms
## - ontology: MF
## - method: topGO
## - summary:
##  Oral_elim
##       MF_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 826
##         feasible_genes: 9061
##         feasible_genes_significant: 700
##         genes_nodeSize: 5
##         nodes_number: 1460
##         edges_number: 1914
##       elim_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 1460
##         GO_significant: 31
##         feasible_genes: 9061
##         feasible_genes_significant: 700
##         genes_nodeSize: 5
##         Nontrivial_nodes: 949 
##  - enrichment pvalue cutoff:
##         Oral_elim : 0.01
## - enrich GOs (in at least one list): 31 GO terms of 1 conditions.
##         Oral_elim : 31 terms
ViSEAGO::show_table(MF_Results)
# print the merged table in a file
ViSEAGO::show_table(
    MF_Results,
    "../output_RNA/differential_expression/semantic-enrichment/MF_DE_05_Oral.tsv"
)
  • enrichment pvalue cutoff: Oral_elim : 0.01
  • enrich GOs (in at least one list): Oral_elim : 32 terms

Semantic similarity –> Cluster: Use Wang graph-based method to identify similarity between GO terms

# initialize 
myGOs<-ViSEAGO::build_GO_SS(
    gene2GO=myGENE2GO_Pacuta,
    enrich_GO_terms=MF_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
    myGOs,
   distance=c("Wang")
)

myGOs
## - object class: GO_SS
## - ontology: MF
## - method: topGO
## - summary:
## Oral_elim
##       MF_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 826
##         feasible_genes: 9061
##         feasible_genes_significant: 700
##         genes_nodeSize: 5
##         nodes_number: 1460
##         edges_number: 1914
##       elim_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 1460
##         GO_significant: 31
##         feasible_genes: 9061
##         feasible_genes_significant: 700
##         genes_nodeSize: 5
##         Nontrivial_nodes: 949 
##  - enrichment pvalue cutoff:
##         Oral_elim : 0.01
## - enrich GOs (in at least one list): 31 GO terms of 1 conditions.
##         Oral_elim : 31 terms
## - terms distances:  Wang

Multi Dimensional Scaling Plot of the GO-terms based on semantic similarity

# display MDSplot
ViSEAGO::MDSplot(myGOs,
                 "GOterms")

Heatmap + table of GO terms, with clusters aggregated by hclust with ward.D2 method

# GOterms heatmap with the default parameters
Wang_clusters_wardD2_Oral <- ViSEAGO::GOterms_heatmap(
  myGOs,
  showIC = TRUE,
  showGOlabels = TRUE,
  GO.tree = list(
    tree = list(distance = "Wang", aggreg.method = "ward.D2"),
    cut = list(
      dynamic = list(
        pamStage = TRUE,
        pamRespectsDendro = TRUE,
        deepSplit = 2,
        minClusterSize = 2
      )
    )
  ),
  samples.tree = NULL
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Oral,
    "GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2_Oral)
# Print the clusters-heatmap table
ViSEAGO::show_table(
    Wang_clusters_wardD2_Oral,
    "../output_RNA/differential_expression/semantic-enrichment/MF_DE_05_Oral_cluster_heatmap_Wang_wardD2.tsv"
)

Multi Dimensional Scaling Plot of the enriched GO-terms based on semantic similarity, colored by cluster

# display colored MDSplot
ViSEAGO::MDSplot(
    Wang_clusters_wardD2_Oral,
    "GOterms")

Further summarizing the Semantic Similarity-based clusters

# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2_Oral<-ViSEAGO::compute_SS_distances(
    Wang_clusters_wardD2_Oral,
    distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned 1:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
    Wang_clusters_wardD2_Oral,
    "GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2_Oral <-ViSEAGO::GOclusters_heatmap(
    Wang_clusters_wardD2_Oral,
    tree=list(
        distance="BMA",
        aggreg.method="ward.D2"
    )
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Oral,
    "GOclusters")

Aboral Epidermis:

create topGO objects and perform enrichment using topGO wrapped by ViSEAGO

# create viseago object
selection <- names(selection_Aboral)[selection_Aboral==1]
background <- names(expressed)

MF_Aboral <- ViSEAGO::create_topGOdata(
    geneSel=selection,
    allGenes=background,
    gene2GO=myGENE2GO_Pacuta, 
    ont="MF",
    nodeSize=5
)
## 
## Building most specific GOs .....
##  ( 3360 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 3929 GO terms and 5138 relations. )
## 
## Annotating nodes ...............
##  ( 9061 genes annotated to the GO terms. )
# perform TopGO test using elim algorithm
elim_Aboral <- topGO::runTest(
    MF_Aboral,
    algorithm ="elim",
    statistic = "fisher"
)
## 
##           -- Elim Algorithm -- 
## 
##       the algorithm is scoring 577 nontrivial nodes
##       parameters: 
##           test statistic: fisher
##           cutOff: 0.01
## 
##   Level 11:  3 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  5 nodes to be scored    (0 eliminated genes)
## 
##   Level 9:   17 nodes to be scored   (0 eliminated genes)
## 
##   Level 8:   40 nodes to be scored   (30 eliminated genes)
## 
##   Level 7:   78 nodes to be scored   (142 eliminated genes)
## 
##   Level 6:   119 nodes to be scored  (704 eliminated genes)
## 
##   Level 5:   136 nodes to be scored  (957 eliminated genes)
## 
##   Level 4:   121 nodes to be scored  (1045 eliminated genes)
## 
##   Level 3:   41 nodes to be scored   (1102 eliminated genes)
## 
##   Level 2:   16 nodes to be scored   (1571 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (1571 eliminated genes)
MF_Results <- ViSEAGO::merge_enrich_terms(
    Input = list(Aboral_elim = c("MF_Aboral", "elim_Aboral"))
)
## 'select()' returned 1:1 mapping between keys and columns

Visualize and save initial results

# display the merged table
MF_Results
## - object class: enrich_GO_terms
## - ontology: MF
## - method: topGO
## - summary:
##  Aboral_elim
##       MF_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 376
##         feasible_genes: 9061
##         feasible_genes_significant: 329
##         genes_nodeSize: 5
##         nodes_number: 1460
##         edges_number: 1914
##       elim_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 1460
##         GO_significant: 28
##         feasible_genes: 9061
##         feasible_genes_significant: 329
##         genes_nodeSize: 5
##         Nontrivial_nodes: 577 
##  - enrichment pvalue cutoff:
##         Aboral_elim : 0.01
## - enrich GOs (in at least one list): 28 GO terms of 1 conditions.
##         Aboral_elim : 28 terms
ViSEAGO::show_table(MF_Results)
# print the merged table in a file
ViSEAGO::show_table(
    MF_Results,
    "../output_RNA/differential_expression/semantic-enrichment/MF_DE_05_Aboral.tsv"
)
  • enrichment pvalue cutoff: Aboral_elim : 0.01
  • enrich GOs (in at least one list): Aboral_elim : 28 terms

Semantic similarity –> Cluster: Use Wang graph-based method to identify similarity between GO terms

# initialize 
myGOs<-ViSEAGO::build_GO_SS(
    gene2GO=myGENE2GO_Pacuta,
    enrich_GO_terms=MF_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
    myGOs,
   distance=c("Wang")
)

myGOs
## - object class: GO_SS
## - ontology: MF
## - method: topGO
## - summary:
## Aboral_elim
##       MF_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 376
##         feasible_genes: 9061
##         feasible_genes_significant: 329
##         genes_nodeSize: 5
##         nodes_number: 1460
##         edges_number: 1914
##       elim_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 1460
##         GO_significant: 28
##         feasible_genes: 9061
##         feasible_genes_significant: 329
##         genes_nodeSize: 5
##         Nontrivial_nodes: 577 
##  - enrichment pvalue cutoff:
##         Aboral_elim : 0.01
## - enrich GOs (in at least one list): 28 GO terms of 1 conditions.
##         Aboral_elim : 28 terms
## - terms distances:  Wang

Multi Dimensional Scaling Plot of the GO-terms based on semantic similarity

# display MDSplot
ViSEAGO::MDSplot(myGOs,
                 "GOterms")

Heatmap + table of GO terms, with clusters aggregated by hclust with ward.D2 method

# GOterms heatmap with the default parameters
Wang_clusters_wardD2_Aboral <- ViSEAGO::GOterms_heatmap(
  myGOs,
  showIC = TRUE,
  showGOlabels = TRUE,
  GO.tree = list(
    tree = list(distance = "Wang", aggreg.method = "ward.D2"),
    cut = list(
      dynamic = list(
        pamStage = TRUE,
        pamRespectsDendro = TRUE,
        deepSplit = 2,
        minClusterSize = 2
      )
    )
  ),
  samples.tree = NULL
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Aboral,
    "GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2_Aboral)
# Print the clusters-heatmap table
ViSEAGO::show_table(
    Wang_clusters_wardD2_Aboral,
    "../output_RNA/differential_expression/semantic-enrichment/MF_DE_05_Aboral_cluster_heatmap_Wang_wardD2.tsv"
)

Multi Dimensional Scaling Plot of the enriched GO-terms based on semantic similarity, colored by cluster

# display colored MDSplot
ViSEAGO::MDSplot(
    Wang_clusters_wardD2_Aboral,
    "GOterms")

Further summarizing the Semantic Similarity-based clusters

# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2_Aboral<-ViSEAGO::compute_SS_distances(
    Wang_clusters_wardD2_Aboral,
    distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned 1:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
    Wang_clusters_wardD2_Aboral,
    "GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2_Aboral<-ViSEAGO::GOclusters_heatmap(
    Wang_clusters_wardD2_Aboral,
    tree=list(
        distance="BMA",
        aggreg.method="ward.D2"
    )
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Aboral,
    "GOclusters")

Plotting of both tissues, pre-clustered by tissue

Custom plotting of results

clustered_Oral_DEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2_Oral@enrich_GOs@data) 

cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2_Oral,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>% 
  mutate(
    cluster = str_extract(cluster_full, "\\d+"),
    Cluster.name = str_replace(cluster_full,".*?_.*?_",""),
    Cluster.term = str_replace(cluster_full,"^\\d+_",""),
    Cluster.term = str_replace(Cluster.term,"_.*$","")
  ) #%>%
  #mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) #%>%
  #dplyr::select(cluster, Cluster.name)

clustered_Oral_DEGs_enrichedGO <- clustered_Oral_DEGs_enrichedGO %>%
  left_join(cluster_map, by = c("GO.cluster" = "cluster")) %>%
  mutate(Tissue="Oral")

clustered_Aboral_DEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2_Aboral@enrich_GOs@data)

cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2_Aboral,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>% 
  mutate(
    cluster = str_extract(cluster_full, "\\d+"),
    Cluster.name = str_replace(cluster_full,".*?_.*?_",""),
    Cluster.term = str_replace(cluster_full,"^\\d+_",""),
    Cluster.term = str_replace(Cluster.term,"_.*$","")
  ) #%>%
  #mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) #%>%
  #dplyr::select(cluster, Cluster.name)
clustered_Aboral_DEGs_enrichedGO <- clustered_Aboral_DEGs_enrichedGO %>%
  left_join(cluster_map, by = c("GO.cluster" = "cluster")) %>%
  mutate(Tissue="Aboral")


clustered_allDEGs_enrichedGO <- rbind(clustered_Oral_DEGs_enrichedGO %>%
                                        dplyr::select(-contains("Oral")),
                                      clustered_Aboral_DEGs_enrichedGO %>%
                                        dplyr::select(-contains("Aboral")))

Plot

library(dplyr)
library(tidyr)
library(ggplot2)

# Create bidirectional data
cluster_data <- clustered_allDEGs_enrichedGO %>%
  dplyr::select(Cluster.name, Tissue) %>%
  group_by(Cluster.name) %>%
  summarise(
    Oral_terms = sum(Tissue=="Oral"),
    Aboral_terms = sum(Tissue=="Aboral"),
    .groups = "drop"
  ) %>%
  mutate(
    deviation = Aboral_terms - Oral_terms,
    Oral_terms = -Oral_terms  # Make negative for bidirectional plot
  ) %>%
  pivot_longer(cols = c("Oral_terms", "Aboral_terms"), 
               names_to = "Tissue", values_to = "n_terms") %>%
  mutate(Tissue = ifelse(Tissue == "Oral_terms", "Oral epidermis", "Aboral"))

# Calculate transition points for dashed lines
transition_data <- cluster_data %>%
  distinct(Cluster.name, deviation) %>%
  arrange(deviation)

oral_to_equal <- sum(transition_data$deviation >= 0) + 0.5
equal_to_aboral <- sum(transition_data$deviation > 0) + 0.5

# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
  geom_col(width = 0.7, alpha = 0.8) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
  geom_vline(xintercept = c(oral_to_equal, equal_to_aboral), 
             color = "grey60", linetype = "dashed", linewidth = 0.6) +
  scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
  coord_flip() +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12)
  ) +
  scale_y_continuous(labels = abs) +
  labs(
    x = "Gene clusters",
    y = "Number of enriched GO terms",
    title = "Tissue-specific GO term enrichment patterns",
    subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
  )

ggsave("../output_RNA/differential_expression/semantic-enrichment/MF_GO_enrichment_bidirectional_clustered_separate.png", p, width = 10, height = 8, dpi = 300)

Both tissues together:

create topGO objects and perform enrichment using topGO wrapped by ViSEAGO

MF_Results <- ViSEAGO::merge_enrich_terms(
    Input = list(Oral_elim = c("MF_Oral", "elim_Oral"),
                 Aboral_elim = c("MF_Aboral", "elim_Aboral"))
)
## 'select()' returned 1:1 mapping between keys and columns

Visualize and save initial results

# display the merged table
MF_Results
## - object class: enrich_GO_terms
## - ontology: MF
## - method: topGO
## - summary:
##  Oral_elim
##       MF_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 826
##         feasible_genes: 9061
##         feasible_genes_significant: 700
##         genes_nodeSize: 5
##         nodes_number: 1460
##         edges_number: 1914
##       elim_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 1460
##         GO_significant: 31
##         feasible_genes: 9061
##         feasible_genes_significant: 700
##         genes_nodeSize: 5
##         Nontrivial_nodes: 949 
##  Aboral_elim
##       MF_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 376
##         feasible_genes: 9061
##         feasible_genes_significant: 329
##         genes_nodeSize: 5
##         nodes_number: 1460
##         edges_number: 1914
##       elim_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 1460
##         GO_significant: 28
##         feasible_genes: 9061
##         feasible_genes_significant: 329
##         genes_nodeSize: 5
##         Nontrivial_nodes: 577 
##  - enrichment pvalue cutoff:
##         Oral_elim : 0.01
##         Aboral_elim : 0.01
## - enrich GOs (in at least one list): 58 GO terms of 2 conditions.
##         Oral_elim : 31 terms
##         Aboral_elim : 28 terms
ViSEAGO::show_table(MF_Results)
# print the merged table in a file
ViSEAGO::show_table(
    MF_Results,
    "../output_RNA/differential_expression/semantic-enrichment/MF_DE_05.tsv"
)
  • enrichment pvalue cutoff: Oral_elim : 0.01 Aboral_elim : 0.01
  • enrich GOs (in at least one list): 59 GO terms of 2 conditions. Oral_elim : 32 terms Aboral_elim : 28 terms

Semantic similarity –> Cluster: Use Wang graph-based method to identify similarity between GO terms

# initialize 
myGOs<-ViSEAGO::build_GO_SS(
    gene2GO=myGENE2GO_Pacuta,
    enrich_GO_terms=MF_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
    myGOs,
   distance=c("Wang")
)

myGOs
## - object class: GO_SS
## - ontology: MF
## - method: topGO
## - summary:
## Oral_elim
##       MF_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 826
##         feasible_genes: 9061
##         feasible_genes_significant: 700
##         genes_nodeSize: 5
##         nodes_number: 1460
##         edges_number: 1914
##       elim_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 1460
##         GO_significant: 31
##         feasible_genes: 9061
##         feasible_genes_significant: 700
##         genes_nodeSize: 5
##         Nontrivial_nodes: 949 
##  Aboral_elim
##       MF_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 376
##         feasible_genes: 9061
##         feasible_genes_significant: 329
##         genes_nodeSize: 5
##         nodes_number: 1460
##         edges_number: 1914
##       elim_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 1460
##         GO_significant: 28
##         feasible_genes: 9061
##         feasible_genes_significant: 329
##         genes_nodeSize: 5
##         Nontrivial_nodes: 577 
##  - enrichment pvalue cutoff:
##         Oral_elim : 0.01
##         Aboral_elim : 0.01
## - enrich GOs (in at least one list): 58 GO terms of 2 conditions.
##         Oral_elim : 31 terms
##         Aboral_elim : 28 terms
## - terms distances:  Wang

Multi Dimensional Scaling Plot of the GO-terms based on semantic similarity

# display MDSplot
ViSEAGO::MDSplot(myGOs,
                 "GOterms")

Heatmap + table of GO terms, with clusters aggregated by hclust with ward.D2 method

# GOterms heatmap with the default parameters
Wang_clusters_wardD2 <- ViSEAGO::GOterms_heatmap(
  myGOs,
  showIC = TRUE,
  showGOlabels = TRUE,
  GO.tree = list(
    tree = list(distance = "Wang", aggreg.method = "ward.D2"),
    cut = list(
      dynamic = list(
        pamStage = TRUE,
        pamRespectsDendro = TRUE,
        deepSplit = 2,
        minClusterSize = 4
      )
    )
  ),
  samples.tree = NULL
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2,
    "GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2)
# Print the clusters-heatmap table
ViSEAGO::show_table(
    Wang_clusters_wardD2,
    "../output_RNA/differential_expression/semantic-enrichment/MF_DE_05_cluster_heatmap_Wang_wardD2.tsv"
)

Multi Dimensional Scaling Plot of the enriched GO-terms based on semantic similarity, colored by cluster

# display colored MDSplot
ViSEAGO::MDSplot(
    Wang_clusters_wardD2,
    "GOterms")

Further summarizing the Semantic Similarity-based clusters

# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2<-ViSEAGO::compute_SS_distances(
    Wang_clusters_wardD2,
    distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned 1:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
    Wang_clusters_wardD2,
    "GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2<-ViSEAGO::GOclusters_heatmap(
    Wang_clusters_wardD2,
    tree=list(
        distance="BMA",
        aggreg.method="ward.D2"
    )
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2,
    "GOclusters")

Custom plotting of results

clustered_allDEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2@enrich_GOs@data)

cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>% 
  mutate(
    cluster = str_extract(cluster_full, "\\d+"),
    Cluster.name = str_replace(cluster_full,".*?_.*?_","")
  ) %>%
  mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) %>%
  select(cluster, Cluster.name)

cluster_map
##   cluster                                  Cluster.name
## 1       1 Cluster 1: transmembrane transporter activity
## 2       2    Cluster 2: monoatomic ion channel activity
## 3       3                    Cluster 3: protein binding
## 4       4    Cluster 4: carbohydrate derivative binding
## 5       5                 Cluster 5: molecular_function
## 6       6                            Cluster 6: binding
## 7       7        Cluster 7: signaling receptor activity
## 8       8                 Cluster 8: hydrolase activity
## 9       9                 Cluster 9: catalytic activity

Manually fix cluster names based on GO terms inside each cluster

cluster_map_fixed <- cluster_map #%>%
  # mutate(Cluster.name = str_replace(Cluster.name, "Cluster 2: developmental process", "Cluster 2: cell fate determination"),
  #        Cluster.name = str_replace(Cluster.name, "Cluster 4: anatomical structure development", "Cluster 4: regeneration & tissue morphogenesis"),
  #        Cluster.name = str_replace(Cluster.name, "Cluster 5: multicellular organism development", "Cluster 5: multicellular organism pattern formation"),
  #        Cluster.name = str_replace(Cluster.name, "Cluster 9: transport", "Cluster 9: metal ion transmembrane transport"),
  #        Cluster.name = str_replace(Cluster.name, "Cluster 10: transport", "Cluster 10: amino acid & small molecule transmembrane transport"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 12: nervous system process", "Cluster 12: sensory perception"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 16: biological_process", "Cluster 16: regeneration & wound healing"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 18: secretion by cell", "Cluster 18: neurotransmitter and hormone secretion"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 24: external encapsulating structure organization", "Cluster 24: extracellular matrix organization"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 25: biological_process", "Cluster 25: cell organization and homeostasis")
  #        )
clustered_allDEGs_enrichedGO <- clustered_allDEGs_enrichedGO %>%
  left_join(cluster_map_fixed, by = c("GO.cluster" = "cluster"))

write.csv(clustered_allDEGs_enrichedGO,"../output_RNA/differential_expression/semantic-enrichment/MF_DE_05_clusters_named.csv")

library(DT)

# Interactive table with scrolling
datatable(clustered_allDEGs_enrichedGO,
          options = list(
            pageLength = 10,   # rows per page
            scrollX = TRUE,    # horizontal scroll if wide
            scrollY = "400px"  # vertical scroll
          ))

Plot

library(dplyr)
library(tidyr)
library(ggplot2)

# Create bidirectional data
cluster_data <- clustered_allDEGs_enrichedGO %>%
  select(Cluster.name, Oral_elim.Significant_genes, Aboral_elim.Significant_genes) %>%
  mutate(
    # Clean cluster names
    #Cluster.name = gsub("^Cluster \\d+:\\s*", "", Cluster.name),
    Oral_terms = ifelse(!is.na(Oral_elim.Significant_genes), 1, 0),
    Aboral_terms = ifelse(!is.na(Aboral_elim.Significant_genes), 1, 0)
  ) %>%
  group_by(Cluster.name) %>%
  summarise(
    Oral_terms = sum(Oral_terms),
    Aboral_terms = sum(Aboral_terms),
    .groups = "drop"
  ) %>%
  filter(Oral_terms + Aboral_terms > 0) %>%
  mutate(
    deviation = Aboral_terms - Oral_terms,
    Oral_terms = -Oral_terms  # Make negative for bidirectional plot
  ) %>%
  pivot_longer(cols = c("Oral_terms", "Aboral_terms"), 
               names_to = "Tissue", values_to = "n_terms") %>%
  mutate(Tissue = ifelse(Tissue == "Oral_terms", "Oral epidermis", "Aboral"))

datatable(cluster_data,
          options = list(
            pageLength = 10,   # rows per page
            scrollX = TRUE,    # horizontal scroll if wide
            scrollY = "400px"  # vertical scroll
          ))
# Calculate transition points for dashed lines
transition_data <- cluster_data %>%
  distinct(Cluster.name, deviation) %>%
  arrange(deviation)

oral_to_equal <- sum(transition_data$deviation >= 0) + 0.5
equal_to_aboral <- sum(transition_data$deviation > 0) + 0.5

# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
  geom_col(width = 0.7, alpha = 0.8) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
  geom_vline(xintercept = c(oral_to_equal, equal_to_aboral), 
             color = "grey60", linetype = "dashed", linewidth = 0.6) +
  scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
  coord_flip() +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12)
  ) +
  scale_y_continuous(labels = abs) +
  labs(
    x = "Gene clusters",
    y = "Number of enriched GO terms",
    title = "Tissue-specific GO term enrichment patterns",
    subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
  )

ggsave("../output_RNA/differential_expression/semantic-enrichment/MF_GO_enrichment_bidirectional.png", p, width = 10, height = 8, dpi = 300)

print(p)

# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
  geom_col(width = 0.7, alpha = 0.8) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
  geom_vline(xintercept = c(oral_to_equal, equal_to_aboral), 
             color = "grey60", linetype = "dashed", linewidth = 0.6) +
  scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) +  #green: "#2E8B57"
  coord_flip() +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12, hjust=0)
  ) +
  scale_y_continuous(labels = abs) +
  scale_x_discrete(position = "top") +
  labs(
    x = "Gene clusters",
    y = "Number of enriched GO terms",
    title = "Tissue-specific GO term enrichment patterns",
    subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
  )

ggsave("../output_RNA/differential_expression/semantic-enrichment/MF_GO_enrichment_bidirectional_rightaxis.png", p, width = 10, height = 8, dpi = 300)

print(p)

# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
  geom_col(width = 0.7, alpha = 0.8) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
  geom_vline(xintercept = c(oral_to_equal, equal_to_aboral), 
             color = "grey60", linetype = "dashed", linewidth = 0.6) +
  scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
  #coord_flip() +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "right",
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(size = 12,angle = 65, hjust = 1),
    axis.text.y = element_text(size = 12)
  ) +
  scale_y_continuous(labels = abs) +
  labs(
    x = "Gene clusters",
    y = "Number of enriched GO terms",
    title = "Tissue-specific GO term enrichment patterns",
    subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
  )

p

ggsave("../output_RNA/differential_expression/semantic-enrichment/MF_GO_enrichment_bidirectional_horiz.png", p, width = 12, height = 8, dpi = 300)

Combine everything into one big plot:

BP_GOs_Aboral <- read.delim(file="../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral.tsv") %>% mutate(ontology="BP")
MF_GOs_Aboral <- read.delim(file="../output_RNA/differential_expression/semantic-enrichment/MF_DE_05_Aboral.tsv")   %>% mutate(ontology="MF")
CC_GOs_Aboral <- read.delim(file="../output_RNA/differential_expression/semantic-enrichment/CC_DE_05_Aboral.tsv") %>% mutate(ontology="CC")

BP_GOs_Oral <- read.delim(file="../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral.tsv") %>% mutate(ontology="BP")
MF_GOs_Oral <- read.delim(file="../output_RNA/differential_expression/semantic-enrichment/MF_DE_05_Oral.tsv") %>% mutate(ontology="MF")
CC_GOs_Oral <- read.delim(file="../output_RNA/differential_expression/semantic-enrichment/CC_DE_05_Oral.tsv") %>% mutate(ontology="CC")

oral_combined <- rbind(BP_GOs_Oral,MF_GOs_Oral,CC_GOs_Oral) %>%
                        rename_with(~ str_replace_all(.x, "Oral_elim|\\.", "")) %>% mutate(Tissue="Oral")
aboral_combined <- rbind(BP_GOs_Aboral,MF_GOs_Aboral,CC_GOs_Aboral) %>%
                        rename_with(~ str_replace_all(.x, "Aboral_elim|\\.", "")) %>% mutate(Tissue="Aboral")

tissues_combined <- rbind(oral_combined,aboral_combined) %>%
                          mutate(freq = as.numeric(str_remove(genes_frequency,pattern="\\%.*")))
aboral_cols <- c(
  BP = "#756BB1",
  CC = "#9E9AC8",
  MF = "#CBC9E2"
)

oral_cols <- c(
  BP = "#FD8D3C",
  CC = "#FDAE6B",
  MF = "#FDD0A2"
)

top10_pval <- tissues_combined %>%
  filter(ontology=="BP") %>%
  group_by(Tissue,ontology) %>%
  slice_max(order_by = desc(pvalue), n = 30, with_ties = FALSE) %>% 
  arrange(Tissue, ontology, log10_pvalue) %>%
  ungroup() %>%
  mutate(term=str_wrap(`term`, width = 40))

p1 <- top10_pval %>% filter(Tissue == "Aboral") %>%
  mutate(term = fct_reorder(term, log10_pvalue)) %>% 
  ggplot(aes(x = term, y = log10_pvalue, fill = ontology)) +
  geom_col(width = .8, fill="#756BB1", alpha = 0.8) +
  facet_grid(ontology ~ ., scales = "free_y", space = "free_y") +
  coord_flip() +
 # scale_fill_manual(values = aboral_cols) +
  theme_bw(base_size = 9) +
  #scale_y_continuous(limits = c(0, 35), expand = expansion(mult = c(0, 0.05)))+
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
    axis.title.y = element_blank(),
    axis.text.y = element_text(size = 7, lineheight = 0.7,color = "black"),
    strip.text = element_text(face = "bold", size = 9),
    strip.background = element_rect(fill = "white", color = "black"),
    legend.position = "none",
    plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
    panel.spacing.y = unit(0.2, "lines")
  ) + labs(subtitle = "Aboral", y = "-log10(p-value)")

p2 <- top10_pval %>% filter(Tissue == "Oral") %>%
  mutate(term = fct_reorder(term, log10_pvalue)) %>% 
  ggplot(aes(x = term, y = log10_pvalue, fill = ontology)) +
  geom_col(width = .8, fill="#FD8D3C", alpha = 0.8) +
  facet_grid(ontology ~ ., scales = "free_y", space = "free_y") +
  coord_flip() +
 # scale_fill_manual(values = oral_cols) +
 # scale_y_continuous(limits = c(0, 35), expand = expansion(mult = c(0, 0.05)))+
  theme_bw(base_size = 9) +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
    axis.title.y = element_blank(),
    axis.text.y = element_text(size = 7, lineheight = 0.7,color = "black"),
    strip.text = element_text(face = "bold", size = 9),
     strip.background = element_rect(fill = "white", color = "black"),
    legend.position = "none",
    plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
    panel.spacing.y = unit(0.2, "lines")
  ) + labs(subtitle = "Oral epidermis", y = "-log10(p-value)")

plot <- p1 + p2 +
  plot_annotation(
    title = "Top 30 enriched GO terms by p-value",
    theme = theme(plot.title = element_text(face = "bold", size = 9, hjust = 0.5))
  )

ggsave("../output_RNA/differential_expression/semantic-enrichment/Top_BP_30.png", plot, width = 7, height = 6.5, dpi = 300)
top10_freq <- tissues_combined %>% group_by(Tissue,ontology) %>%
  slice_max(order_by = freq, n = 10, with_ties = FALSE) %>% 
  arrange(Tissue, ontology, log10_pvalue) %>%
  ungroup() %>%
  mutate(term=str_wrap(`term`, width = 40))


p1 <- top10_freq %>% filter(Tissue == "Aboral") %>%
  mutate(term = fct_reorder(term, freq)) %>% 
  ggplot(aes(x = term, y = freq, fill = ontology)) +
  geom_col(width = 0.7, alpha = 0.85) +
  facet_grid(ontology ~ ., scales = "free_y", space = "free_y") +
  coord_flip() +
  scale_fill_manual(values = aboral_cols) +
  theme_bw(base_size = 9) +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
    axis.title.y = element_blank(),
    axis.text.y = element_text(size = 7, lineheight = 0.7,color = "black"),
    strip.text = element_text(face = "bold", size = 9),
     strip.background = element_rect(fill = "white", color = "black"),
    legend.position = "none",
    plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
    panel.spacing.y = unit(0.2, "lines")
  ) + labs(subtitle = "Aboral", y = "% DEGs with GO term of\nall genes with GO term")

p2 <- top10_freq %>% filter(Tissue == "Oral") %>%
  mutate(term = fct_reorder(term, freq)) %>% 
  ggplot(aes(x = term, y = freq, fill = ontology)) +
  geom_col(width = 0.7, alpha = 0.85) +
  facet_grid(ontology ~ ., scales = "free_y", space = "free_y") +
  coord_flip() +
  scale_fill_manual(values = oral_cols) +
  theme_bw(base_size = 9) +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
    axis.title.y = element_blank(),
    axis.text.y = element_text(size = 7, lineheight = 0.7,color = "black"),
    strip.text = element_text(face = "bold", size = 9),
     strip.background = element_rect(fill = "white", color = "black"),
    legend.position = "none",
    plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
    panel.spacing.y = unit(0.2, "lines")
  ) + labs(subtitle = "Oral epidermis", y = "% DEGs with GO term of\nall genes with GO term")


plot <- p1 + p2 +
  plot_annotation(
    title = "Top 10 enriched GO terms by frequency in tissue DEGs",
    theme = theme(plot.title = element_text(face = "bold", size = 9, hjust = 0.5))
  )

ggsave("../output_RNA/differential_expression/semantic-enrichment/All_Onts_frq.png", plot, width = 7, height = 6.5, dpi = 300)
# Create gradient scales for each tissue
aboral_gradient <- colorRampPalette(c("#CBC9E2", "#756BB1"))(100)
oral_gradient <- colorRampPalette(c("#FDD0A2", "#FD8D3C"))(100)

p1 <- top10_freq %>% filter(Tissue == "Aboral") %>%
  mutate(term = fct_reorder(term, freq)) %>% 
  ggplot(aes(x = term, y = freq, fill = log10_pvalue)) +
  geom_col(width = 0.7, alpha = 0.85) +
  facet_grid(ontology ~ ., scales = "free_y", space = "free_y") +
  coord_flip() +
  scale_fill_gradientn(colors = aboral_gradient, name = "-log10(p-value)") +
  theme_bw(base_size = 9) +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
    axis.title.y = element_blank(),
    axis.text.y = element_text(size = 7, lineheight = 0.7, color = "black"),
    strip.text = element_text(face = "bold", size = 9),
    strip.background = element_rect(fill = "white", color = "black"),
    legend.position = "right",
    legend.key.height = unit(0.8, "cm"),
    legend.key.width = unit(0.3, "cm"),
    plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
    panel.spacing.y = unit(0.2, "lines")
  ) + labs(subtitle = "Aboral", y = "% DEGs with GO term of\nall genes with GO term")

p2 <- top10_freq %>% filter(Tissue == "Oral") %>%
  mutate(term = fct_reorder(term, freq)) %>% 
  ggplot(aes(x = term, y = freq, fill = log10_pvalue)) +
  geom_col(width = 0.7, alpha = 0.85) +
  facet_grid(ontology ~ ., scales = "free_y", space = "free_y") +
  coord_flip() +
  scale_fill_gradientn(colors = oral_gradient, name = "-log10(p-value)") +
  theme_bw(base_size = 9) +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
    axis.title.y = element_blank(),
    axis.text.y = element_text(size = 7, lineheight = 0.7, color = "black"),
    strip.text = element_text(face = "bold", size = 9),
    strip.background = element_rect(fill = "white", color = "black"),
    legend.position = "right",
    legend.key.height = unit(0.8, "cm"),
    legend.key.width = unit(0.3, "cm"),
    plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
    panel.spacing.y = unit(0.2, "lines")
  ) + labs(subtitle = "Oral epidermis", y = "% DEGs with GO term of\nall genes with GO term")

plot <- p1 + p2 +
  plot_annotation(
    title = "Top 10 enriched GO terms by frequency in tissue DEGs",
    theme = theme(plot.title = element_text(face = "bold", size = 9, hjust = 0.5))
  )

ggsave("../output_RNA/differential_expression/semantic-enrichment/All_Onts_freq_pval.png", plot, width = 8, height = 6.5, dpi = 300)